GCC Code Coverage Report


Directory: ./
File: strings/ctype-uca.cc
Date: 2022-12-06 21:40:42
Exec Total Coverage
Lines: 1622 1782 91.0%
Branches: 2616 9044 28.9%

Line Branch Exec Source
1 /* Copyright (c) 2004, 2022, Oracle and/or its affiliates.
2
3 This library is free software; you can redistribute it and/or
4 modify it under the terms of the GNU Library General Public
5 License, version 2.0, as published by the Free Software Foundation.
6
7 This library is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the library and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 Without limiting anything contained in the foregoing, this file,
15 which is part of C Driver for MySQL (Connector/C), is also subject to the
16 Universal FOSS Exception, version 1.0, a copy of which can be found at
17 http://oss.oracle.com/licenses/universal-foss-exception.
18
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Library General Public License, version 2.0, for more details.
23
24 You should have received a copy of the GNU Library General Public
25 License along with this library; if not, write to the Free
26 Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
27 MA 02110-1301 USA */
28
29 /*
30 UCA (Unicode Collation Algorithm) support.
31
32 Features that are not implemented yet:
33 - No Normalization From D is done
34 + No decomposition is done
35 + No Thai/Lao orderding is done
36 - No combining marks processing is done
37 */
38
39 #include <assert.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <sys/types.h>
43 #include <algorithm>
44 #include <bitset>
45 #include <iterator>
46 #include <map>
47 #include <utility>
48
49 #include "m_ctype.h"
50 #include "m_string.h"
51 #include "my_byteorder.h"
52 #include "my_compiler.h"
53
54 #include "my_inttypes.h"
55 #include "my_loglevel.h"
56 #include "my_macros.h"
57 #include "mysys_err.h"
58 #include "strings/mb_wc.h"
59 #include "strings/str_uca_type.h"
60 #include "strings/uca900_data.h"
61 #include "strings/uca900_ja_data.h"
62 #include "strings/uca900_zh_data.h"
63 #include "strings/uca_data.h"
64 #include "template_utils.h"
65
66 MY_UCA_INFO my_uca_v400 = {
67 UCA_V400,
68
69 0xFFFF, /* maxchar */
70 uca_length, uca_weight, false, nullptr, /* contractions */
71 nullptr,
72
73 /* Logical positions */
74 0x0009, /* first_non_ignorable p != ignore */
75 0xA48C, /* last_non_ignorable Not a CJK and not UNASSIGNED */
76
77 0x0332, /* first_primary_ignorable p == 0 */
78 0x20EA, /* last_primary_ignorable */
79
80 0x0000, /* first_secondary_ignorable p,s == 0 */
81 0xFE73, /* last_secondary_ignorable p,s == 0 */
82
83 0x0000, /* first_tertiary_ignorable p,s,t == 0 */
84 0xFE73, /* last_tertiary_ignorable p,s,t == 0 */
85
86 0x0000, /* first_trailing */
87 0x0000, /* last_trailing */
88
89 0x0009, /* first_variable */
90 0x2183, /* last_variable */
91 0, /* extra_ce_pri_base, not used */
92 0, /* extra_ce_sec_base, not used */
93 0 /* extra_ce_ter_base, not used */
94 };
95
96 /******************************************************/
97
98 MY_UCA_INFO my_uca_v520 = {
99 UCA_V520,
100
101 0x10FFFF, /* maxchar */
102 uca520_length,
103 uca520_weight,
104 false,
105 nullptr, /* contractions */
106 nullptr,
107
108 0x0009, /* first_non_ignorable p != ignore */
109 0x1342E, /* last_non_ignorable Not a CJK and not UASSIGNED */
110
111 0x0332, /* first_primary_ignorable p == ignore */
112 0x101FD, /* last_primary_ignorable */
113
114 0x0000, /* first_secondary_ignorable p,s= ignore */
115 0xFE73, /* last_secondary_ignorable */
116
117 0x0000, /* first_tertiary_ignorable p,s,t == ignore */
118 0xFE73, /* last_tertiary_ignorable */
119
120 0x0000, /* first_trailing */
121 0x0000, /* last_trailing */
122
123 0x0009, /* first_variable if alt=non-ignorable: p != ignore */
124 0x1D371, /* last_variable if alt=shifter: p,s,t == ignore */
125 0, /* extra_ce_pri_base, not used */
126 0, /* extra_ce_sec_base, not used */
127 0 /* extra_ce_ter_base, not used */
128 };
129
130 /******************************************************/
131
132 /*
133 German Phonebook
134 */
135 static const char german2[] =
136 "&AE << \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 "
137 "&OE << \\u0153 <<< \\u0152 << \\u00F6 <<< \\u00D6 "
138 "&UE << \\u00FC <<< \\u00DC ";
139
140 /*
141 Some sources treat LETTER A WITH DIAERESIS (00E4,00C4)
142 secondary greater than LETTER AE (00E6,00C6).
143 http://www.evertype.com/alphabets/icelandic.pdf
144 http://developer.mimer.com/collations/charts/icelandic.htm
145
146 Other sources do not provide any special rules
147 for LETTER A WITH DIAERESIS:
148 http://www.omniglot.com/writing/icelandic.htm
149 http://en.wikipedia.org/wiki/Icelandic_alphabet
150 http://oss.software.ibm.com/icu/charts/collation/is.html
151
152 Let's go the first way.
153 */
154
155 static const char icelandic[] =
156 "& A < \\u00E1 <<< \\u00C1 "
157 "& D < \\u00F0 <<< \\u00D0 "
158 "& E < \\u00E9 <<< \\u00C9 "
159 "& I < \\u00ED <<< \\u00CD "
160 "& O < \\u00F3 <<< \\u00D3 "
161 "& U < \\u00FA <<< \\u00DA "
162 "& Y < \\u00FD <<< \\u00DD "
163 "& Z < \\u00FE <<< \\u00DE "
164 "< \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 "
165 "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "
166 "< \\u00E5 <<< \\u00C5 ";
167
168 /*
169 Some sources treat I and Y primary different.
170 Other sources treat I and Y the same on primary level.
171 We'll go the first way.
172 */
173
174 static const char latvian[] =
175 "& C < \\u010D <<< \\u010C "
176 "& G < \\u0123 <<< \\u0122 "
177 "& I < \\u0079 <<< \\u0059 "
178 "& K < \\u0137 <<< \\u0136 "
179 "& L < \\u013C <<< \\u013B "
180 "& N < \\u0146 <<< \\u0145 "
181 "& R < \\u0157 <<< \\u0156 "
182 "& S < \\u0161 <<< \\u0160 "
183 "& Z < \\u017E <<< \\u017D ";
184
185 static const char romanian[] =
186 "& A < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 "
187 "& I < \\u00EE <<< \\u00CE "
188 "& S < \\u0219 <<< \\u0218 << \\u015F <<< \\u015E "
189 "& T < \\u021B <<< \\u021A << \\u0163 <<< \\u0162 ";
190
191 static const char slovenian[] =
192 "& C < \\u010D <<< \\u010C "
193 "& S < \\u0161 <<< \\u0160 "
194 "& Z < \\u017E <<< \\u017D ";
195
196 static const char polish[] =
197 "& A < \\u0105 <<< \\u0104 "
198 "& C < \\u0107 <<< \\u0106 "
199 "& E < \\u0119 <<< \\u0118 "
200 "& L < \\u0142 <<< \\u0141 "
201 "& N < \\u0144 <<< \\u0143 "
202 "& O < \\u00F3 <<< \\u00D3 "
203 "& S < \\u015B <<< \\u015A "
204 "& Z < \\u017A <<< \\u0179 < \\u017C <<< \\u017B";
205
206 static const char estonian[] =
207 "& S < \\u0161 <<< \\u0160 "
208 " < \\u007A <<< \\u005A "
209 " < \\u017E <<< \\u017D "
210 "& W < \\u00F5 <<< \\u00D5 "
211 "< \\u00E4 <<< \\u00C4 "
212 "< \\u00F6 <<< \\u00D6 "
213 "< \\u00FC <<< \\u00DC ";
214
215 // Standard Spanish, also for Galician.
216 static const char spanish[] = "& N < \\u00F1 <<< \\u00D1 ";
217
218 /*
219 Some sources treat V and W as similar on primary level.
220 We'll treat V and W as different on primary level.
221 */
222
223 static const char swedish[] =
224 "& Y <<\\u00FC <<< \\u00DC "
225 "& Z < \\u00E5 <<< \\u00C5 "
226 "< \\u00E4 <<< \\u00C4 << \\u00E6 <<< \\u00C6 "
227 "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 ";
228
229 static const char turkish[] =
230 "& C < \\u00E7 <<< \\u00C7 "
231 "& G < \\u011F <<< \\u011E "
232 "& H < \\u0131 <<< \\u0049 "
233 "& O < \\u00F6 <<< \\u00D6 "
234 "& S < \\u015F <<< \\u015E "
235 "& U < \\u00FC <<< \\u00DC ";
236
237 static const char czech[] =
238 "& C < \\u010D <<< \\u010C "
239 "& H < ch <<< Ch <<< CH"
240 "& R < \\u0159 <<< \\u0158"
241 "& S < \\u0161 <<< \\u0160"
242 "& Z < \\u017E <<< \\u017D";
243
244 static const char danish[] = /* Also good for Norwegian */
245 "& Y << \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170"
246 "& Z < \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4"
247 " < \\u00F8 <<< \\u00D8 << \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150"
248 " < \\u00E5 <<< \\u00C5 << aa <<< Aa <<< AA";
249
250 static const char lithuanian[] =
251 "& C << ch <<< Ch <<< CH< \\u010D <<< \\u010C"
252 "& E << \\u0119 <<< \\u0118 << \\u0117 <<< \\u0116"
253 "& I << y <<< Y"
254 "& S < \\u0161 <<< \\u0160"
255 "& Z < \\u017E <<< \\u017D";
256
257 static const char slovak[] =
258 "& A < \\u00E4 <<< \\u00C4"
259 "& C < \\u010D <<< \\u010C"
260 "& H < ch <<< Ch <<< CH"
261 "& O < \\u00F4 <<< \\u00D4"
262 "& S < \\u0161 <<< \\u0160"
263 "& Z < \\u017E <<< \\u017D";
264
265 static const char spanish2[] = /* Also good for Asturian and Galician */
266 "&C < ch <<< Ch <<< CH"
267 "&L < ll <<< Ll <<< LL"
268 "&N < \\u00F1 <<< \\u00D1";
269
270 static const char roman[] = /* i.e. Classical Latin */
271 "& I << j <<< J "
272 "& V << u <<< U ";
273
274 /*
275 Persian collation support was provided by
276 Jody McIntyre <mysql@modernduck.com>
277
278 To: internals@lists.mysql.com
279 Subject: Persian UTF8 collation support
280 Date: 17.08.2004
281
282 Contraction is not implemented. Some implementations do perform
283 contraction but others do not, and it is able to sort all my test
284 strings correctly.
285
286 Jody.
287 */
288 static const char persian[] =
289 "& \\u066D < \\u064E < \\uFE76 < \\uFE77 < \\u0650 < \\uFE7A < \\uFE7B"
290 " < \\u064F < \\uFE78 < \\uFE79 < \\u064B < \\uFE70 < \\uFE71"
291 " < \\u064D < \\uFE74 < \\u064C < \\uFE72"
292 "& \\uFE7F < \\u0653 < \\u0654 < \\u0655 < \\u0670"
293 "& \\u0669 < \\u0622 < \\u0627 < \\u0671 < \\u0621 < \\u0623 < \\u0625"
294 " < \\u0624 < \\u0626"
295 "& \\u0642 < \\u06A9 < \\u0643"
296 "& \\u0648 < \\u0647 < \\u0629 < \\u06C0 < \\u06CC < \\u0649 < \\u064A"
297 "& \\uFE80 < \\uFE81 < \\uFE82 < \\uFE8D < \\uFE8E < \\uFB50 < \\uFB51"
298 " < \\uFE80 "
299 /*
300 FE80 appears both in reset and shift.
301 We need to break the rule here and reset to *new* FE80 again,
302 so weight for FE83 is calculated as P[FE80]+1, not as P[FE80]+8.
303 */
304 " & \\uFE80 < \\uFE83 < \\uFE84 < \\uFE87 < \\uFE88 < \\uFE85"
305 " < \\uFE86 < \\u0689 < \\u068A"
306 "& \\uFEAE < \\uFDFC"
307 "& \\uFED8 < \\uFB8E < \\uFB8F < \\uFB90 < \\uFB91 < \\uFED9 < \\uFEDA"
308 " < \\uFEDB < \\uFEDC"
309 "& \\uFEEE < \\uFEE9 < \\uFEEA < \\uFEEB < \\uFEEC < \\uFE93 < \\uFE94"
310 " < \\uFBA4 < \\uFBA5 < \\uFBFC < \\uFBFD < \\uFBFE < \\uFBFF"
311 " < \\uFEEF < \\uFEF0 < \\uFEF1 < \\uFEF2 < \\uFEF3 < \\uFEF4"
312 " < \\uFEF5 < \\uFEF6 < \\uFEF7 < \\uFEF8 < \\uFEF9 < \\uFEFA"
313 " < \\uFEFB < \\uFEFC";
314
315 /*
316 Esperanto tailoring.
317 Contributed by Bertilo Wennergren <bertilow at gmail dot com>
318 September 1, 2005
319 */
320 static const char esperanto[] =
321 "& C < \\u0109 <<< \\u0108"
322 "& G < \\u011D <<< \\u011C"
323 "& H < \\u0125 <<< \\u0124"
324 "& J < \\u0135 <<< \\u0134"
325 "& S < \\u015d <<< \\u015c"
326 "& U < \\u016d <<< \\u016c";
327
328 /*
329 A simplified version of Hungarian, without consonant contractions.
330 */
331 static const char hungarian[] =
332 "&O < \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150"
333 "&U < \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170";
334
335 static const char croatian[] =
336 "&C < \\u010D <<< \\u010C < \\u0107 <<< \\u0106"
337 "&D < d\\u017E = \\u01C6 <<< d\\u017D <<< D\\u017E = \\u01C5 <<< D\\u017D "
338 "= \\u01C4"
339 " < \\u0111 <<< \\u0110"
340 "&L < lj = \\u01C9 <<< lJ <<< Lj = \\u01C8 <<< LJ = \\u01C7"
341 "&N < nj = \\u01CC <<< nJ <<< Nj = \\u01CB <<< NJ = \\u01CA"
342 "&S < \\u0161 <<< \\u0160"
343 "&Z < \\u017E <<< \\u017D";
344
345 /*
346 SCCII Part 1 : Collation Sequence (SLS1134)
347 2006/11/24
348 Harshula Jayasuriya <harshula at gmail dot com>
349 Language Technology Research Lab, University of Colombo / ICTA
350 */
351 #if 0
352 static const char sinhala[]=
353 "& \\u0D96 < \\u0D82 < \\u0D83"
354 "& \\u0DA5 < \\u0DA4"
355 "& \\u0DD8 < \\u0DF2 < \\u0DDF < \\u0DF3"
356 "& \\u0DDE < \\u0DCA";
357 #else
358 static const char sinhala[] =
359 "& \\u0D96 < \\u0D82 < \\u0D83 < \\u0D9A < \\u0D9B < \\u0D9C < \\u0D9D"
360 "< \\u0D9E < \\u0D9F < \\u0DA0 < \\u0DA1 < \\u0DA2 < \\u0DA3"
361 "< \\u0DA5 < \\u0DA4 < \\u0DA6"
362 "< \\u0DA7 < \\u0DA8 < \\u0DA9 < \\u0DAA < \\u0DAB < \\u0DAC"
363 "< \\u0DAD < \\u0DAE < \\u0DAF < \\u0DB0 < \\u0DB1"
364 "< \\u0DB3 < \\u0DB4 < \\u0DB5 < \\u0DB6 < \\u0DB7 < \\u0DB8"
365 "< \\u0DB9 < \\u0DBA < \\u0DBB < \\u0DBD < \\u0DC0 < \\u0DC1"
366 "< \\u0DC2 < \\u0DC3 < \\u0DC4 < \\u0DC5 < \\u0DC6"
367 "< \\u0DCF"
368 "< \\u0DD0 < \\u0DD1 < \\u0DD2 < \\u0DD3 < \\u0DD4 < \\u0DD6"
369 "< \\u0DD8 < \\u0DF2 < \\u0DDF < \\u0DF3 < \\u0DD9 < \\u0DDA"
370 "< \\u0DDB < \\u0DDC < \\u0DDD < \\u0DDE < \\u0DCA";
371 #endif
372
373 static const char vietnamese[] =
374 " &A << \\u00E0 <<< \\u00C0" /* A */
375 " << \\u1EA3 <<< \\u1EA2"
376 " << \\u00E3 <<< \\u00C3"
377 " << \\u00E1 <<< \\u00C1"
378 " << \\u1EA1 <<< \\u1EA0"
379 " < \\u0103 <<< \\u0102" /* A WITH BREVE */
380 " << \\u1EB1 <<< \\u1EB0"
381 " << \\u1EB3 <<< \\u1EB2"
382 " << \\u1EB5 <<< \\u1EB4"
383 " << \\u1EAF <<< \\u1EAE"
384 " << \\u1EB7 <<< \\u1EB6"
385 " < \\u00E2 <<< \\u00C2" /* A WITH CIRCUMFLEX */
386 " << \\u1EA7 <<< \\u1EA6"
387 " << \\u1EA9 <<< \\u1EA8"
388 " << \\u1EAB <<< \\u1EAA"
389 " << \\u1EA5 <<< \\u1EA4"
390 " << \\u1EAD <<< \\u1EAC"
391 " &D < \\u0111 <<< \\u0110" /* D WITH STROKE */
392 " &E << \\u00E8 <<< \\u00C8" /* E */
393 " << \\u1EBB <<< \\u1EBA"
394 " << \\u1EBD <<< \\u1EBC"
395 " << \\u00E9 <<< \\u00C9"
396 " << \\u1EB9 <<< \\u1EB8"
397 " < \\u00EA <<< \\u00CA" /* E WITH CIRCUMFLEX */
398 " << \\u1EC1 <<< \\u1EC0"
399 " << \\u1EC3 <<< \\u1EC2"
400 " << \\u1EC5 <<< \\u1EC4"
401 " << \\u1EBF <<< \\u1EBE"
402 " << \\u1EC7 <<< \\u1EC6"
403 " &I << \\u00EC <<< \\u00CC" /* I */
404 " << \\u1EC9 <<< \\u1EC8"
405 " << \\u0129 <<< \\u0128"
406 " << \\u00ED <<< \\u00CD"
407 " << \\u1ECB <<< \\u1ECA"
408 " &O << \\u00F2 <<< \\u00D2" /* O */
409 " << \\u1ECF <<< \\u1ECE"
410 " << \\u00F5 <<< \\u00D5"
411 " << \\u00F3 <<< \\u00D3"
412 " << \\u1ECD <<< \\u1ECC"
413 " < \\u00F4 <<< \\u00D4" /* O WITH CIRCUMFLEX */
414 " << \\u1ED3 <<< \\u1ED2"
415 " << \\u1ED5 <<< \\u1ED4"
416 " << \\u1ED7 <<< \\u1ED6"
417 " << \\u1ED1 <<< \\u1ED0"
418 " << \\u1ED9 <<< \\u1ED8"
419 " < \\u01A1 <<< \\u01A0" /* O WITH HORN */
420 " << \\u1EDD <<< \\u1EDC"
421 " << \\u1EDF <<< \\u1EDE"
422 " << \\u1EE1 <<< \\u1EE0"
423 " << \\u1EDB <<< \\u1EDA"
424 " << \\u1EE3 <<< \\u1EE2"
425 " &U << \\u00F9 <<< \\u00D9" /* U */
426 " << \\u1EE7 <<< \\u1EE6"
427 " << \\u0169 <<< \\u0168"
428 " << \\u00FA <<< \\u00DA"
429 " << \\u1EE5 <<< \\u1EE4"
430 " < \\u01B0 <<< \\u01AF" /* U WITH HORN */
431 " << \\u1EEB <<< \\u1EEA"
432 " << \\u1EED <<< \\u1EEC"
433 " << \\u1EEF <<< \\u1EEE"
434 " << \\u1EE9 <<< \\u1EE8"
435 " << \\u1EF1 <<< \\u1EF0"
436 " &Y << \\u1EF3 <<< \\u1EF2" /* Y */
437 " << \\u1EF7 <<< \\u1EF6"
438 " << \\u1EF9 <<< \\u1EF8"
439 " << \\u00FD <<< \\u00DD"
440 " << \\u1EF5 <<< \\u1EF4";
441
442 /* German Phonebook */
443 static const char de_pb_cldr_30[] =
444 "&AE << \\u00E4 <<< \\u00C4 "
445 "&OE << \\u00F6 <<< \\u00D6 "
446 "&UE << \\u00FC <<< \\u00DC ";
447
448 /* Icelandic */
449 static const char is_cldr_30[] =
450 "&[before 1]b < \\u00E1 <<< \\u00C1 "
451 "& d << \\u0111 <<< \\u0110 < \\u00F0 <<< \\u00D0 "
452 "&[before 1]f < \\u00E9 <<< \\u00C9 "
453 "&[before 1]j < \\u00ED <<< \\u00CD "
454 "&[before 1]p < \\u00F3 <<< \\u00D3 "
455 "&[before 1]v < \\u00FA <<< \\u00DA "
456 "&[before 1]z < \\u00FD <<< \\u00DD "
457 "&[before 1]\\u01C0 < \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 "
458 "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "
459 "< \\u00E5 <<< \\u00C5";
460
461 /* Latvian */
462 static const char lv_cldr_30[] =
463 "&[before 1]D < \\u010D <<< \\u010C "
464 "&[before 1]H < \\u0123 <<< \\u0122 "
465 "& I << y <<< Y "
466 "&[before 1]L < \\u0137 <<< \\u0136 "
467 "&[before 1]M < \\u013C <<< \\u013B "
468 "&[before 1]O < \\u0146 <<< \\u0145 "
469 "&[before 1]S < \\u0157 <<< \\u0156 "
470 "&[before 1]T < \\u0161 <<< \\u0160 "
471 "&[before 1]\\u01B7 < \\u017E <<< \\u017D";
472
473 /* Romanian */
474 static const char ro_cldr_30[] =
475 "&A < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 "
476 "&I < \\u00EE <<< \\u00CE "
477 "&S < \\u015F = \\u0219 <<< \\u015E = \\u0218 "
478 "&T < \\u0163 = \\u021B <<< \\u0162 = \\u021A";
479
480 /* Slovenian */
481 static const char sl_cldr_30[] =
482 "&C < \\u010D <<< \\u010C < \\u0107 <<< \\u0106 "
483 "&D < \\u0111 <<< \\u0110 "
484 "&S < \\u0161 <<< \\u0160 "
485 "&Z < \\u017E <<< \\u017D";
486
487 /* Polish */
488 static const char pl_cldr_30[] =
489 "&A < \\u0105 <<< \\u0104 "
490 "&C < \\u0107 <<< \\u0106 "
491 "&E < \\u0119 <<< \\u0118 "
492 "&L < \\u0142 <<< \\u0141 "
493 "&N < \\u0144 <<< \\u0143 "
494 "&O < \\u00F3 <<< \\u00D3 "
495 "&S < \\u015B <<< \\u015A "
496 "&Z < \\u017A <<< \\u0179 < \\u017C <<< \\u017B";
497
498 /* Estonian */
499 static const char et_cldr_30[] =
500 "&[before 1]T < \\u0161 <<< \\u0160 < z <<< Z "
501 "< \\u017E <<< \\u017D "
502 "&[before 1]X < \\u00F5 <<< \\u00D5 < \\u00E4 <<< \\u00C4 "
503 "< \\u00F6 <<< \\u00D6 < \\u00FC <<< \\u00DC";
504
505 /* Swedish */
506 static const char sv_cldr_30[] =
507 "& D << \\u0111 <<< \\u0110 << \\u00F0 <<< \\u00D0 "
508 "& t <<< \\u00FE/h "
509 "& T <<< \\u00DE/H "
510 "& Y << \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170 "
511 "&[before 1]\\u01C0 < \\u00E5 <<< \\u00C5 < \\u00E4 <<< \\u00C4 "
512 "<< \\u00E6 <<< \\u00C6 << \\u0119 <<< \\u0118 "
513 "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "
514 "<< \\u0151 <<< \\u0150 << \\u0153 <<< \\u0152 "
515 "<< \\u00F4 <<< \\u00D4";
516
517 /* Turkish */
518 static const char tr_cldr_30[] =
519 "& C < \\u00E7 <<< \\u00C7 "
520 "& G < \\u011F <<< \\u011E "
521 "&[before 1]i < \\u0131 <<< I "
522 "& i <<< \\u0130 "
523 "& O < \\u00F6 <<< \\u00D6 "
524 "& S < \\u015F <<< \\u015E "
525 "& U < \\u00FC <<< \\u00DC ";
526
527 /* Czech */
528 static const char cs_cldr_30[] =
529 "&C < \\u010D <<< \\u010C "
530 "&H < ch <<< cH <<< Ch <<< CH "
531 "&R < \\u0159 <<< \\u0158"
532 "&S < \\u0161 <<< \\u0160"
533 "&Z < \\u017E <<< \\u017D";
534
535 /* Danish, same for Norwegian */
536 static const char da_cldr_30[] =
537 "& D << \\u0111 <<< \\u0110 << \\u00F0 <<< \\u00D0 "
538 "& t <<< \\u00FE/h "
539 "& T <<< \\u00DE/H "
540 "& Y << \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170 "
541 "&[before 1]\\u01C0 < \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 "
542 "< \\u00F8 <<< \\u00D8 << \\u00F6 <<< \\u00D6 "
543 "<< \\u0151 <<< \\u0150 << \\u0153 <<< \\u0152 "
544 "< \\u00E5 <<< \\u00C5 <<< aa <<< Aa "
545 "<<< AA";
546
547 static Coll_param da_coll_param = {nullptr, false, CASE_FIRST_UPPER};
548
549 /* CASE FIRST OFF for Norwegian */
550 static Coll_param no_coll_param = {nullptr, false, CASE_FIRST_OFF};
551
552 /* Lithuanian */
553 static const char lt_cldr_30[] =
554 "&\\u0300 = \\u0307\\u0300 "
555 "&\\u0301 = \\u0307\\u0301 "
556 "&\\u0303 = \\u0307\\u0303 "
557 "&A << \\u0105 <<< \\u0104 "
558 "&C < \\u010D <<< \\u010C "
559 "&E << \\u0119 <<< \\u0118 << \\u0117 <<< \\u0116"
560 "&I << \\u012F <<< \\u012E << y <<< Y "
561 "&S < \\u0161 <<< \\u0160 "
562 "&U << \\u0173 <<< \\u0172 << \\u016B <<< \\u016A "
563 "&Z < \\u017E <<< \\u017D";
564
565 /* Slovak */
566 static const char sk_cldr_30[] =
567 "&A < \\u00E4 <<< \\u00C4 "
568 "&C < \\u010D <<< \\u010C "
569 "&H < ch <<< cH <<< Ch <<< CH "
570 "&O < \\u00F4 <<< \\u00D4 "
571 "&R < \\u0159 <<< \\u0158 "
572 "&S < \\u0161 <<< \\u0160 "
573 "&Z < \\u017E <<< \\u017D";
574
575 /* Spanish (Traditional) */
576 static const char es_trad_cldr_30[] =
577 "&N < \\u00F1 <<< \\u00D1 "
578 "&C < ch <<< Ch <<< CH "
579 "&l < ll <<< Ll <<< LL";
580
581 /* Persian */
582 #if 0
583 static const char fa_cldr_30[]=
584 "& \\u064E << \\u0650 << \\u064F << \\u064B << \\u064D "
585 "<< \\u064C "
586 "&[before 1]\\u0627 < \\u0622 "
587 "& \\u0627 << \\u0671 < \\u0621 << \\u0623 << \\u0672 "
588 "<< \\u0625 << \\u0673 << \\u0624 << \\u06CC\\u0654 "
589 "<<< \\u0649\\u0654 <<< \\u0626 "
590 "& \\u06A9 << \\u06AA << \\u06AB << \\u0643 << \\u06AC "
591 "<< \\u06AD << \\u06AE "
592 "& \\u06CF < \\u0647 << \\u06D5 << \\u06C1 << \\u0629 "
593 "<< \\u06C3 << \\u06C0 << \\u06BE "
594 "& \\u06CC << \\u0649 << \\u06D2 << \\u064A << \\u06D0 "
595 "<< \\u06D1 << \\u06CD << \\u06CE";
596
597 static Reorder_param fa_reorder_param= {
598 {CHARGRP_ARAB, CHARGRP_NONE}, {{{0, 0}, {0, 0}}}, 0
599 };
600
601 static Coll_param fa_coll_param= {
602 &fa_reorder_param, true
603 };
604 #endif
605
606 /* Hungarian */
607 static const char hu_cldr_30[] =
608 "&C < cs <<< Cs <<< CS "
609 "&D < dz <<< Dz <<< DZ "
610 "&DZ < dzs <<< Dzs <<< DZS "
611 "&G < gy <<< Gy <<< GY "
612 "&L < ly <<< Ly <<< LY "
613 "&N < ny <<< Ny <<< NY "
614 "&S < sz <<< Sz <<< SZ "
615 "&T < ty <<< Ty <<< TY "
616 "&Z < zs <<< Zs <<< ZS "
617 "&O < \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150 "
618 "&U < \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170 "
619 "&cs <<< ccs/cs "
620 "&Cs <<< Ccs/cs "
621 "&CS <<< CCS/CS "
622 "&dz <<< ddz/dz "
623 "&Dz <<< Ddz/dz "
624 "&DZ <<< DDZ/DZ "
625 "&dzs<<< ddzs/dzs "
626 "&Dzs<<< Ddzs/dzs "
627 "&DZS<<< DDZS/DZS "
628 "&gy <<< ggy/gy "
629 "&Gy <<< Ggy/gy "
630 "&GY <<< GGY/GY "
631 "&ly <<< lly/ly "
632 "&Ly <<< Lly/ly "
633 "&LY <<< LLY/LY "
634 "&ny <<< nny/ny "
635 "&Ny <<< Nny/ny "
636 "&NY <<< NNY/NY "
637 "&sz <<< ssz/sz "
638 "&Sz <<< Ssz/sz "
639 "&SZ <<< SSZ/SZ "
640 "&ty <<< tty/ty "
641 "&Ty <<< Tty/ty "
642 "&TY <<< TTY/TY "
643 "&zs <<< zzs/zs "
644 "&Zs <<< Zzs/zs "
645 "&ZS <<< ZZS/ZS";
646
647 /* Croatian, same for Serbian with Latin and Bosnian. */
648 static const char hr_cldr_30[] =
649 "&C < \\u010D <<< \\u010C < \\u0107 <<< \\u0106 "
650 "&D < d\\u017E <<< \\u01C6 <<< D\\u017E <<< \\u01C5 <<< D\\u017D "
651 "<<< \\u01C4 < \\u0111 <<< \\u0110 "
652 "&L < lj <<< \\u01C9 <<< Lj <<< \\u01C8 <<< LJ "
653 "<<< \\u01C7 "
654 "&N < nj <<< \\u01CC <<< Nj <<< \\u01CB <<< NJ "
655 "<<< \\u01CA "
656 "&S < \\u0161 <<< \\u0160 "
657 "&Z < \\u017E <<< \\u017D ";
658
659 static Reorder_param hr_reorder_param = {
660 {CHARGRP_LATIN, CHARGRP_CYRILLIC, CHARGRP_NONE}, {{{0, 0}, {0, 0}}}, 0, 0};
661
662 static Coll_param hr_coll_param = {&hr_reorder_param, false, CASE_FIRST_OFF};
663
664 /* Sinhala */
665 #if 0
666 static const char si_cldr_30[]=
667 "&\\u0D96 < \\u0D82 < \\u0D83 "
668 "&\\u0DA5 < \\u0DA4";
669 #endif
670
671 /* Vietnamese */
672 static const char vi_cldr_30[] =
673 "&\\u0300 << \\u0309 << \\u0303 << \\u0301 << \\u0323 "
674 "&a < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 "
675 "&d < \\u0111 <<< \\u0110 "
676 "&e < \\u00EA <<< \\u00CA "
677 "&o < \\u00F4 <<< \\u00D4 < \\u01A1 <<< \\u01A0 "
678 "&u < \\u01B0 <<< \\u01AF";
679
680 static Coll_param vi_coll_param = {nullptr, true, CASE_FIRST_OFF};
681
682 static Reorder_param ja_reorder_param = {
683 /*
684 Per CLDR 30, Japanese reorder rule is defined as [Latn Kana Hani],
685 but for Hani characters, their weight is implicit according to UCA,
686 which is different from other character groups. We don't add "Hani"
687 below and will have special handling for them in
688 adjust_japanese_weight() and apply_reorder_param(). Implicit weight
689 has two collation elements. To make strnxfrm() run faster, we give
690 Japanese Han characters tailored weight which has only one collation
691 element. These characters' weight is defined in ja_han_pages.
692 */
693 {CHARGRP_LATIN, CHARGRP_KANA, CHARGRP_NONE},
694 {{{0, 0}, {0, 0}}},
695 0,
696 0};
697
698 static Coll_param ja_coll_param = {&ja_reorder_param, false /*norm_enabled*/,
699 CASE_FIRST_OFF};
700
701 /*
702 The Chinese reorder rule is defined as [Hani]. This means all Han characters'
703 weight should be greater than the core group and smaller than any other
704 character groups.
705 The Han characters are separated into two parts. The CLDR collation
706 definition file, zh.xml, defines 41336 Han characters' order, and all other
707 Han characters have implicit weight.
708 Since the core group characters occupy the weight value from 0x0209 to 0x1C46
709 in DUCET, so we decide to set the weight of all Han characters defined in
710 zh.xml to be the value from 0x1C47 to 0xBDBE. The smallest weight value of
711 these Han characters, 0x1C47, being the largest weight value of the core
712 group plus one (0x1C46 + 1), ensures these Han characters sort greater than
713 the core group characters.
714 Also, we set the implicit weight to the Han characters like
715 [BDBF - BDC3, 0020, 0002][XXXX, 0000, 0000].
716 To tailor the weight of characters of Latin, Cyrillic and so on to be bigger
717 than all Han characters, we give these characters weights from 0xBDC4 to
718 0xF620. There are many character groups between the core group and the Han
719 group, so it would be a long list if we put them in the following reorder_grp
720 structure. But since it is a very simple weight shift, we put their calculated
721 weight here and do not calculate it in my_prepare_reorder().
722
723 NOTE: We use the zh.xml file from CLDR v33.1 to implement this Chinese
724 collation, because we found that the file of CLDR v30 is missing some very
725 common Han characters (the Han character 'small', etc).
726 */
727 static Reorder_param zh_reorder_param = {
728 {CHARGRP_NONE}, {{{0x1C47, 0x54A3}, {0xBDC4, 0xF620}}}, 1, 0x54A3};
729
730 static Coll_param zh_coll_param = {&zh_reorder_param, false, CASE_FIRST_OFF};
731
732 /* Russian, same for Bulgerian and Mongolian with Cyrillic letters */
733 static Reorder_param ru_reorder_param = {
734 {CHARGRP_CYRILLIC, CHARGRP_NONE}, {{{0, 0}, {0, 0}}}, 0, 0};
735
736 static Coll_param ru_coll_param = {&ru_reorder_param, false /*norm_enabled*/,
737 CASE_FIRST_OFF};
738
739 static constexpr uint16 nochar[] = {0, 0};
740
741 /**
742 Unicode Collation Algorithm:
743 Collation element (weight) scanner,
744 for consequent scan of collations
745 weights from a string.
746
747 Only meant as a base class; instantiate uca_scanner_any or uca_scanner_900
748 instead of this.
749 */
750 class my_uca_scanner {
751 protected:
752 11076031406 my_uca_scanner(const CHARSET_INFO *cs_arg, const uchar *str, size_t length)
753 11076031406 : wbeg(nochar),
754 11076031406 sbeg(str),
755 11076031406 send(str + length),
756 11076031406 uca(cs_arg->uca),
757 11076031406 cs(cs_arg),
758 11076031406 sbeg_dup(str) {}
759
760 public:
761 /**
762 Get the level the scanner is currently working on. The string
763 can be scanned multiple times (if the collation requires multi-level
764 comparisons, e.g. for accent or case sensitivity); first to get
765 primary weights, then from the start again for secondary, etc.
766 */
767 38020806387 uint get_weight_level() const { return weight_lv; }
768
769 protected:
770 uint weight_lv{0}; /* 0 = Primary, 1 = Secondary, 2 = Tertiary */
771 const uint16 *wbeg; /* Beginning of the current weight string */
772 uint wbeg_stride{0}; /* Number of bytes between weights in string */
773 const uchar *sbeg; /* Beginning of the input string */
774 const uchar *send; /* End of the input string */
775 const MY_UCA_INFO *uca;
776 uint16 implicit[10];
777 my_wc_t prev_char{0}; // Previous code point we scanned, if any.
778 const CHARSET_INFO *cs;
779 uint num_of_ce_left{0};
780 const uchar *sbeg_dup; /* Backup of beginning of input string */
781
782 protected:
783 const uint16 *contraction_find(my_wc_t wc0, size_t *chars_skipped);
784 inline const uint16 *previous_context_find(my_wc_t wc0, my_wc_t wc1);
785 };
786
787 /*
788 Charset dependent scanner part, to optimize
789 some character sets.
790 */
791
792 template <class Mb_wc>
793 struct uca_scanner_any : public my_uca_scanner {
794 508351708 uca_scanner_any(const Mb_wc mb_wc, const CHARSET_INFO *cs_arg,
795 const uchar *str, size_t length)
796 508351708 : my_uca_scanner(cs_arg, str, length), mb_wc(mb_wc) {
797 // UCA 9.0.0 uses a different table format from what this scanner expects.
798
2/4
✓ Branch 0 taken 254175871 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 254175871 times.
508351724 assert(cs_arg->uca == nullptr || cs_arg->uca->version != UCA_V900);
799 508351724 }
800
801 1014477204 uint get_char_index() const { return char_index; }
802
803 inline int next();
804
805 private:
806 /**
807 How many code points (possibly multibyte) we have scanned so far.
808 This includes code points with zero weight. Note that this is reset
809 once we get to the end of the string and restart the scanning for
810 the next weight level, but it is _not_ reset when we reach the
811 end of the last level.
812 */
813 uint char_index{0};
814
815 const Mb_wc mb_wc;
816
817 inline int next_implicit(my_wc_t ch);
818 };
819
820 template <class Mb_wc, int LEVELS_FOR_COMPARE>
821 class uca_scanner_900 : public my_uca_scanner {
822 public:
823 21643811440 uca_scanner_900(const Mb_wc mb_wc, const CHARSET_INFO *cs_arg,
824 const uchar *str, size_t length)
825 21643811440 : my_uca_scanner(cs_arg, str, length), mb_wc(mb_wc) {}
826
827 inline int next();
828
829 /**
830 For each weight in sequence, call "func", which should have
831 a function signature of "bool func(int weight, bool is_level_separator)".
832 Stops the iteration early if "func" returns false.
833
834 This is morally equivalent to
835
836 int weight;
837 while ((weight= next()) >= 0)
838 {
839 if (!func(weight, weight == 0)) break;
840 }
841
842 except that it might employ optimizations internally to speed up
843 the process. These optimizations will not modify the number of calls
844 to func() (or their order), but might affect the internal scanner
845 state during the calls, so func() should not try to read from
846 the scanner except by calling public member functions.
847
848 As a special optimization, if "bool preaccept_data(int num_weights)"
849 returns true, the next "num_weights" calls to func() _must_ return
850 true. This is so that bounds checking costs can be amortized
851 over fewer calls.
852 */
853 template <class T, class U>
854 inline void for_each_weight(T func, U preaccept_data);
855
856 private:
857 const Mb_wc mb_wc;
858
859 inline int next_raw();
860 inline int more_weight();
861 uint16 apply_case_first(uint16 weight);
862 uint16 apply_reorder_param(uint16 weight);
863 inline int next_implicit(my_wc_t ch);
864 void my_put_jamo_weights(my_wc_t *hangul_jamo, int jamo_cnt);
865 /*
866 apply_reorder_param() needs to return two weights for each origin
867 weight. This boolean signals whether we have already returned the
868 FB86 weight, and are ready to return the origin weight.
869 */
870 bool return_origin_weight{true};
871 /*
872 For Japanese kana-sensitive collation, we only add quaternary
873 weight for katakana and hiragana, but not for others like latin
874 and kanji, because characters like latin and kanji can be already
875 distinguished from kana by three levels of weight.
876 has_quaternary_weight is to indicate whether quaternary weight is
877 needed for characters in string.
878 */
879 bool has_quaternary_weight{false};
880 int handle_ja_contraction_quat_wt();
881 int handle_ja_common_quat_wt(my_wc_t wc);
882 };
883
884 /********** Helper functions to handle contraction ************/
885
886 /**
887 Mark a code point as a contraction part
888
889 @param flags Pointer to UCA contraction flag data
890 @param wc Unicode code point
891 @param flag flag: "is contraction head", "is contraction tail"
892 */
893
894 546791 static inline void my_uca_add_contraction_flag(char *flags, my_wc_t wc,
895 int flag) {
896 546791 flags[wc & MY_UCA_CNT_FLAG_MASK] |= flag;
897 546791 }
898
899 /**
900 Check if UCA level data has contractions.
901
902 @param uca Pointer to UCA data
903
904 @return Flags indicating if UCA with contractions
905 @retval 0 - no contractions
906 @retval 1 - there are some contractions
907 */
908
909 37803780609 static inline bool my_uca_have_contractions(const MY_UCA_INFO *uca) {
910 37803780609 return uca->have_contractions;
911 }
912
913 struct trie_node_cmp {
914 3021597 bool operator()(const MY_CONTRACTION &a, const my_wc_t b) { return a.ch < b; }
915 bool operator()(const MY_CONTRACTION &a, const MY_CONTRACTION &b) {
916 return a.ch < b.ch;
917 }
918 };
919
920 static std::vector<MY_CONTRACTION>::const_iterator
921 489459 find_contraction_part_in_trie(const std::vector<MY_CONTRACTION> &cont_nodes,
922 my_wc_t ch) {
923
2/2
✓ Branch 0 taken 21062 times.
✓ Branch 1 taken 468397 times.
489459 if (cont_nodes.empty()) return cont_nodes.end();
924 468397 return std::lower_bound(cont_nodes.begin(), cont_nodes.end(), ch,
925 468397 trie_node_cmp());
926 }
927
928 549253 static std::vector<MY_CONTRACTION>::iterator find_contraction_part_in_trie(
929 std::vector<MY_CONTRACTION> &cont_nodes, my_wc_t ch) {
930
2/2
✓ Branch 0 taken 52185 times.
✓ Branch 1 taken 497068 times.
549253 if (cont_nodes.empty()) return cont_nodes.end();
931 497068 return std::lower_bound(cont_nodes.begin(), cont_nodes.end(), ch,
932 497068 trie_node_cmp());
933 }
934 /**
935 Find a contraction consisting of two code points and return its weight array
936
937 @param cont_nodes Vector that contains contraction nodes
938 @param wc1 First code point
939 @param wc2 Second code point
940
941 @return Weight array
942 @retval NULL - no contraction found
943 @retval ptr - contraction weight array
944 */
945
946 199 const uint16 *my_uca_contraction2_weight(
947 const std::vector<MY_CONTRACTION> *cont_nodes, my_wc_t wc1, my_wc_t wc2) {
948
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 199 times.
199 if (!cont_nodes) return nullptr;
949
950
1/2
✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
199 if (!cont_nodes->empty()) {
951 std::vector<MY_CONTRACTION>::const_iterator node_it1 =
952
1/2
✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
199 find_contraction_part_in_trie(*cont_nodes, wc1);
953
3/6
✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 199 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 199 times.
398 if (node_it1 == cont_nodes->end() || node_it1->ch != wc1) return nullptr;
954 std::vector<MY_CONTRACTION>::const_iterator node_it2 =
955
1/2
✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
199 find_contraction_part_in_trie(node_it1->child_nodes, wc2);
956
3/6
✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 199 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 199 times.
✗ Branch 5 not taken.
398 if (node_it2 != node_it1->child_nodes.end() && node_it2->ch == wc2 &&
957
1/2
✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
199 node_it2->is_contraction_tail)
958 199 return node_it2->weight;
959 }
960 return nullptr;
961 }
962
963 /**
964 Check if a code point can be previous context head
965
966 @param flags Pointer to UCA contraction flag data
967 @param wc Code point
968
969 @retval false - cannot be previous context head
970 @retval true - can be previous context head
971 */
972
973 27040 static inline bool my_uca_can_be_previous_context_head(const char *flags,
974 my_wc_t wc) {
975 27040 return flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_HEAD;
976 }
977
978 /**
979 Check if a code point can be previous context tail
980
981 @param flags Pointer to UCA contraction flag data
982 @param wc Code point
983
984 @retval false - cannot be contraction tail
985 @retval true - can be contraction tail
986 */
987
988 182990785 static inline bool my_uca_can_be_previous_context_tail(const char *flags,
989 my_wc_t wc) {
990 182990785 return flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_TAIL;
991 }
992
993 /**
994 Check if a string is a contraction of exactly the given length,
995 and return its weight array on success.
996
997 @param cont_nodes Vector that contains contraction nodes
998 @param wc Pointer to wide string
999 @param len String length
1000
1001 @return Weight array
1002 @retval NULL - Input string is not a known contraction
1003 @retval ptr - contraction weight array
1004 */
1005
1006 376988 static inline const uint16 *my_uca_contraction_weight(
1007 const std::vector<MY_CONTRACTION> *cont_nodes, const my_wc_t *wc,
1008 size_t len) {
1009
2/2
✓ Branch 0 taken 180420 times.
✓ Branch 1 taken 196568 times.
376988 if (!cont_nodes) return nullptr;
1010
1011 196568 std::vector<MY_CONTRACTION>::const_iterator node_it;
1012
2/2
✓ Branch 0 taken 270557 times.
✓ Branch 1 taken 20987 times.
291544 for (size_t ch_ind = 0; ch_ind < len; ++ch_ind) {
1013
1/2
✓ Branch 0 taken 270557 times.
✗ Branch 1 not taken.
270557 node_it = find_contraction_part_in_trie(*cont_nodes, wc[ch_ind]);
1014
6/6
✓ Branch 0 taken 200743 times.
✓ Branch 1 taken 69814 times.
✓ Branch 2 taken 105767 times.
✓ Branch 3 taken 94976 times.
✓ Branch 4 taken 175581 times.
✓ Branch 5 taken 94976 times.
270557 if (node_it == cont_nodes->end() || node_it->ch != wc[ch_ind])
1015 175581 return nullptr;
1016 94976 cont_nodes = &node_it->child_nodes;
1017 }
1018
2/2
✓ Branch 0 taken 20027 times.
✓ Branch 1 taken 960 times.
20987 if (node_it->is_contraction_tail) return node_it->weight;
1019 960 return nullptr;
1020 }
1021
1022 /**
1023 Return length of a 0-terminated wide string, analogous to strnlen().
1024
1025 @param s Pointer to wide string
1026 @param maxlen Mamixum string length
1027
1028 @return string length, or maxlen if no '\0' is met.
1029 */
1030 2813640 static size_t my_wstrnlen(my_wc_t *s, size_t maxlen) {
1031
2/2
✓ Branch 0 taken 6517219 times.
✓ Branch 1 taken 1081 times.
6518300 for (size_t i = 0; i < maxlen; i++) {
1032
2/2
✓ Branch 0 taken 2812559 times.
✓ Branch 1 taken 3704660 times.
6517219 if (s[i] == 0) return i;
1033 }
1034 1081 return maxlen;
1035 }
1036
1037 /**
1038 Find a contraction in the input stream and return its weight array
1039
1040 Scan input code points to find a longest path in contraction trie
1041 which contains all these code points. If the ending node of this
1042 path is end of contraction, return the weight array.
1043
1044 @param wc0 The first code point of the contraction (which should have
1045 the MY_UCA_CNT_HEAD flag).
1046 @param[out] chars_skipped How many code points where skipped in the
1047 contraction we found. Only makes sense if we actually found one.
1048
1049 @return Weight array
1050 @retval NULL no contraction found
1051 @retval ptr contraction weight array
1052 */
1053
1054 201097 const uint16 *my_uca_scanner::contraction_find(my_wc_t wc0,
1055 size_t *chars_skipped) {
1056 201097 const uchar *beg = nullptr;
1057 201097 auto mb_wc = cs->cset->mb_wc;
1058
1059 201097 const uchar *s = sbeg;
1060 201097 const std::vector<MY_CONTRACTION> *cont_nodes = uca->contraction_nodes;
1061 201097 const MY_CONTRACTION *longest_contraction = nullptr;
1062 201097 std::vector<MY_CONTRACTION>::const_iterator node_it;
1063 for (;;) {
1064
1/2
✓ Branch 0 taken 216042 times.
✗ Branch 1 not taken.
216042 node_it = find_contraction_part_in_trie(*cont_nodes, wc0);
1065
6/6
✓ Branch 0 taken 26844 times.
✓ Branch 1 taken 189198 times.
✓ Branch 2 taken 6476 times.
✓ Branch 3 taken 20368 times.
✓ Branch 4 taken 195674 times.
✓ Branch 5 taken 20368 times.
216042 if (node_it == cont_nodes->end() || node_it->ch != wc0) break;
1066
2/2
✓ Branch 0 taken 3734 times.
✓ Branch 1 taken 16634 times.
20368 if (node_it->is_contraction_tail) {
1067 3734 longest_contraction = &(*node_it);
1068 3734 beg = s;
1069 3734 *chars_skipped = node_it->contraction_len - 1;
1070 }
1071 int mblen;
1072
3/4
✓ Branch 0 taken 20368 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 5423 times.
✓ Branch 3 taken 14945 times.
20368 if ((mblen = mb_wc(cs, &wc0, s, send)) <= 0) break;
1073 14945 s += mblen;
1074 14945 cont_nodes = &node_it->child_nodes;
1075 14945 }
1076
1077
2/2
✓ Branch 0 taken 3509 times.
✓ Branch 1 taken 197588 times.
201097 if (longest_contraction != nullptr) {
1078 3509 const uint16 *cweight = longest_contraction->weight;
1079
2/2
✓ Branch 0 taken 890 times.
✓ Branch 1 taken 2619 times.
3509 if (uca->version == UCA_V900) {
1080 890 cweight += weight_lv;
1081 890 wbeg = cweight + MY_UCA_900_CE_SIZE;
1082 890 wbeg_stride = MY_UCA_900_CE_SIZE;
1083 890 num_of_ce_left = 7;
1084 } else {
1085 2619 wbeg = cweight + 1;
1086 2619 wbeg_stride = MY_UCA_900_CE_SIZE;
1087 }
1088 3509 sbeg = beg;
1089 3509 return cweight;
1090 }
1091 197588 return nullptr; /* No contractions were found */
1092 }
1093
1094 /**
1095 Find weight for contraction with previous context
1096 and return its weight array.
1097
1098 @param wc0 Previous code point
1099 @param wc1 Current code point
1100
1101 @return Weight array
1102 @retval NULL - no contraction with context found
1103 @retval ptr - contraction weight array
1104 */
1105 ALWAYS_INLINE
1106 const uint16 *my_uca_scanner::previous_context_find(my_wc_t wc0, my_wc_t wc1) {
1107 std::vector<MY_CONTRACTION>::const_iterator node_it1 =
1108 2462 find_contraction_part_in_trie(*uca->contraction_nodes, wc1);
1109
18/444
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 1613 times.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✓ Branch 51 taken 1613 times.
✗ Branch 52 not taken.
✓ Branch 53 taken 1613 times.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 639 times.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✓ Branch 63 taken 639 times.
✗ Branch 64 not taken.
✓ Branch 65 taken 639 times.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✓ Branch 144 taken 56 times.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✓ Branch 147 taken 56 times.
✗ Branch 148 not taken.
✓ Branch 149 taken 56 times.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✗ Branch 248 not taken.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✗ Branch 251 not taken.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✗ Branch 255 not taken.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✓ Branch 264 taken 57 times.
✗ Branch 265 not taken.
✗ Branch 266 not taken.
✓ Branch 267 taken 57 times.
✗ Branch 268 not taken.
✓ Branch 269 taken 57 times.
✓ Branch 270 taken 57 times.
✗ Branch 271 not taken.
✗ Branch 272 not taken.
✓ Branch 273 taken 57 times.
✗ Branch 274 not taken.
✓ Branch 275 taken 57 times.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✗ Branch 310 not taken.
✗ Branch 311 not taken.
✗ Branch 312 not taken.
✗ Branch 313 not taken.
✗ Branch 314 not taken.
✗ Branch 315 not taken.
✗ Branch 316 not taken.
✗ Branch 317 not taken.
✗ Branch 318 not taken.
✗ Branch 319 not taken.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✓ Branch 336 taken 40 times.
✗ Branch 337 not taken.
✗ Branch 338 not taken.
✓ Branch 339 taken 40 times.
✗ Branch 340 not taken.
✓ Branch 341 taken 40 times.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✗ Branch 376 not taken.
✗ Branch 377 not taken.
✗ Branch 378 not taken.
✗ Branch 379 not taken.
✗ Branch 380 not taken.
✗ Branch 381 not taken.
✗ Branch 382 not taken.
✗ Branch 383 not taken.
✗ Branch 384 not taken.
✗ Branch 385 not taken.
✗ Branch 386 not taken.
✗ Branch 387 not taken.
✗ Branch 388 not taken.
✗ Branch 389 not taken.
✗ Branch 390 not taken.
✗ Branch 391 not taken.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✗ Branch 400 not taken.
✗ Branch 401 not taken.
✗ Branch 402 not taken.
✗ Branch 403 not taken.
✗ Branch 404 not taken.
✗ Branch 405 not taken.
✗ Branch 406 not taken.
✗ Branch 407 not taken.
✗ Branch 408 not taken.
✗ Branch 409 not taken.
✗ Branch 410 not taken.
✗ Branch 411 not taken.
✗ Branch 412 not taken.
✗ Branch 413 not taken.
✗ Branch 414 not taken.
✗ Branch 415 not taken.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✗ Branch 424 not taken.
✗ Branch 425 not taken.
✗ Branch 426 not taken.
✗ Branch 427 not taken.
✗ Branch 428 not taken.
✗ Branch 429 not taken.
✗ Branch 430 not taken.
✗ Branch 431 not taken.
✗ Branch 432 not taken.
✗ Branch 433 not taken.
✗ Branch 434 not taken.
✗ Branch 435 not taken.
✗ Branch 436 not taken.
✗ Branch 437 not taken.
✗ Branch 438 not taken.
✗ Branch 439 not taken.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
2462 if (node_it1 == uca->contraction_nodes->end() || node_it1->ch != wc1)
1110 return nullptr;
1111 std::vector<MY_CONTRACTION>::const_iterator node_it2 =
1112
6/148
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 1613 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 639 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 56 times.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 57 times.
✗ Branch 89 not taken.
✓ Branch 90 taken 57 times.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✓ Branch 112 taken 40 times.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
2462 find_contraction_part_in_trie(node_it1->child_nodes_context, wc0);
1113
18/444
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 1613 times.
✗ Branch 49 not taken.
✓ Branch 50 taken 1613 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 1613 times.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 639 times.
✗ Branch 61 not taken.
✓ Branch 62 taken 639 times.
✗ Branch 63 not taken.
✓ Branch 64 taken 639 times.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✓ Branch 144 taken 56 times.
✗ Branch 145 not taken.
✓ Branch 146 taken 56 times.
✗ Branch 147 not taken.
✓ Branch 148 taken 56 times.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✗ Branch 248 not taken.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✗ Branch 251 not taken.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✗ Branch 255 not taken.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✓ Branch 264 taken 57 times.
✗ Branch 265 not taken.
✓ Branch 266 taken 57 times.
✗ Branch 267 not taken.
✓ Branch 268 taken 57 times.
✗ Branch 269 not taken.
✓ Branch 270 taken 57 times.
✗ Branch 271 not taken.
✓ Branch 272 taken 57 times.
✗ Branch 273 not taken.
✓ Branch 274 taken 57 times.
✗ Branch 275 not taken.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✗ Branch 310 not taken.
✗ Branch 311 not taken.
✗ Branch 312 not taken.
✗ Branch 313 not taken.
✗ Branch 314 not taken.
✗ Branch 315 not taken.
✗ Branch 316 not taken.
✗ Branch 317 not taken.
✗ Branch 318 not taken.
✗ Branch 319 not taken.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✓ Branch 336 taken 40 times.
✗ Branch 337 not taken.
✓ Branch 338 taken 40 times.
✗ Branch 339 not taken.
✓ Branch 340 taken 40 times.
✗ Branch 341 not taken.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✗ Branch 376 not taken.
✗ Branch 377 not taken.
✗ Branch 378 not taken.
✗ Branch 379 not taken.
✗ Branch 380 not taken.
✗ Branch 381 not taken.
✗ Branch 382 not taken.
✗ Branch 383 not taken.
✗ Branch 384 not taken.
✗ Branch 385 not taken.
✗ Branch 386 not taken.
✗ Branch 387 not taken.
✗ Branch 388 not taken.
✗ Branch 389 not taken.
✗ Branch 390 not taken.
✗ Branch 391 not taken.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✗ Branch 400 not taken.
✗ Branch 401 not taken.
✗ Branch 402 not taken.
✗ Branch 403 not taken.
✗ Branch 404 not taken.
✗ Branch 405 not taken.
✗ Branch 406 not taken.
✗ Branch 407 not taken.
✗ Branch 408 not taken.
✗ Branch 409 not taken.
✗ Branch 410 not taken.
✗ Branch 411 not taken.
✗ Branch 412 not taken.
✗ Branch 413 not taken.
✗ Branch 414 not taken.
✗ Branch 415 not taken.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✗ Branch 424 not taken.
✗ Branch 425 not taken.
✗ Branch 426 not taken.
✗ Branch 427 not taken.
✗ Branch 428 not taken.
✗ Branch 429 not taken.
✗ Branch 430 not taken.
✗ Branch 431 not taken.
✗ Branch 432 not taken.
✗ Branch 433 not taken.
✗ Branch 434 not taken.
✗ Branch 435 not taken.
✗ Branch 436 not taken.
✗ Branch 437 not taken.
✗ Branch 438 not taken.
✗ Branch 439 not taken.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
2462 if (node_it2 != node_it1->child_nodes_context.end() && node_it2->ch == wc0) {
1114
6/148
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 1613 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 639 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 56 times.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 57 times.
✗ Branch 89 not taken.
✓ Branch 90 taken 57 times.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✓ Branch 113 taken 40 times.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
2462 if (uca->version == UCA_V900) {
1115 2422 wbeg = node_it2->weight + MY_UCA_900_CE_SIZE + weight_lv;
1116 2422 wbeg_stride = MY_UCA_900_CE_SIZE;
1117 2422 num_of_ce_left = 7;
1118 } else {
1119 40 wbeg = node_it2->weight + 1;
1120 40 wbeg_stride = MY_UCA_900_CE_SIZE;
1121 }
1122 2462 return node_it2->weight + weight_lv;
1123 }
1124 return nullptr;
1125 }
1126
1127 /****************************************************************/
1128 #define HANGUL_JAMO_MAX_LENGTH 3
1129 /**
1130 Check if a code point is Hangul syllable. Decompose it to jamos
1131 if it is, and return tailored weights.
1132
1133 @param syllable Hangul syllable to be decomposed
1134 @param[out] jamo Corresponding jamos
1135
1136 @return 0 The code point is not Hangul syllable
1137 or cannot be decomposed
1138 others The number of jamos returned
1139 */
1140 408141570 static int my_decompose_hangul_syllable(my_wc_t syllable, my_wc_t *jamo) {
1141
4/4
✓ Branch 0 taken 398320029 times.
✓ Branch 1 taken 9821541 times.
✓ Branch 2 taken 394125971 times.
✓ Branch 3 taken 4194058 times.
408141570 if (syllable < 0xAC00 || syllable > 0xD7AF) return 0;
1142 4194058 constexpr uint syllable_base = 0xAC00;
1143 4194058 constexpr uint leadingjamo_base = 0x1100;
1144 4194058 constexpr uint voweljamo_base = 0x1161;
1145 4194058 constexpr uint trailingjamo_base = 0x11A7;
1146 4194058 constexpr uint voweljamo_cnt = 21;
1147 4194058 constexpr uint trailingjamo_cnt = 28;
1148 4194058 const uint syllable_index = syllable - syllable_base;
1149 4194058 const uint v_t_combination = voweljamo_cnt * trailingjamo_cnt;
1150 4194058 const uint leadingjamo_index = syllable_index / v_t_combination;
1151 4194058 const uint voweljamo_index =
1152 4194058 (syllable_index % v_t_combination) / trailingjamo_cnt;
1153 4194058 const uint trailingjamo_index = syllable_index % trailingjamo_cnt;
1154 4194058 jamo[0] = leadingjamo_base + leadingjamo_index;
1155 4194058 jamo[1] = voweljamo_base + voweljamo_index;
1156
2/2
✓ Branch 0 taken 4043934 times.
✓ Branch 1 taken 150124 times.
4194058 jamo[2] = trailingjamo_index ? (trailingjamo_base + trailingjamo_index) : 0;
1157
2/2
✓ Branch 0 taken 4043934 times.
✓ Branch 1 taken 150124 times.
4194058 return trailingjamo_index ? 3 : 2;
1158 }
1159
1160 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1161 8388116 void uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::my_put_jamo_weights(
1162 my_wc_t *hangul_jamo, int jamo_cnt) {
1163
2/2
✓ Branch 0 taken 12432050 times.
✓ Branch 1 taken 4194058 times.
33252216 for (int jamoind = 0; jamoind < jamo_cnt; jamoind++) {
1164 24864100 uint16 *implicit_weight = implicit + jamoind * MY_UCA_900_CE_SIZE;
1165 24864100 uint page = hangul_jamo[jamoind] >> 8;
1166 24864100 uint code = hangul_jamo[jamoind] & 0xFF;
1167 24864100 const uint16 *jamo_weight_page = uca->weights[page];
1168 24864100 implicit_weight[0] = UCA900_WEIGHT(jamo_weight_page, 0, code);
1169 24864100 implicit_weight[1] = UCA900_WEIGHT(jamo_weight_page, 1, code);
1170 24864100 implicit_weight[2] = UCA900_WEIGHT(jamo_weight_page, 2, code);
1171 }
1172 8388116 implicit[9] = jamo_cnt;
1173 }
1174
1175 /*
1176 Chinese Han characters are assigned an implicit weight according to the
1177 Unicode Collation Algorithm. But when creating our Chinese collation for
1178 utf8mb4, to implement this language's reorder rule, we give the Han
1179 characters in CLDR zh.xml file weight values from 0x1C47 to 0xBDBE, and let
1180 the other Han characters keep their implicit weight. Per UCA, the smallest
1181 leading primary weight of the implicit weight is 0xFB00, and the largest
1182 primary weight we ocuppy for the Han characters in zh.xml is 0xBDBE. There is
1183 a huge gap between these two weight values. To use this weight value gap and
1184 let the character groups like Latin, Cyrillic, have a single primary weight as
1185 before reordering, we change the leading primary weight of the implicit weight
1186 as below.
1187 */
1188 10147731 static uint16 change_zh_implicit(uint16 weight) {
1189
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10147731 times.
10147731 assert(weight >= 0xFB00);
1190
6/7
✓ Branch 0 taken 62409 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1884 times.
✓ Branch 3 taken 471 times.
✓ Branch 4 taken 10519 times.
✓ Branch 5 taken 51845 times.
✓ Branch 6 taken 10020603 times.
10147731 switch (weight) {
1191 62409 case 0xFB00:
1192 62409 return 0xF621;
1193 case 0xFB40:
1194 return 0xBDBF;
1195 1884 case 0xFB41:
1196 1884 return 0xBDC0;
1197 471 case 0xFB80:
1198 471 return 0xBDC1;
1199 10519 case 0xFB84:
1200 10519 return 0xBDC2;
1201 51845 case 0xFB85:
1202 51845 return 0xBDC3;
1203 10020603 default:
1204 10020603 return weight + 0xF622 - 0xFBC0;
1205 }
1206 }
1207
1208 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1209 ALWAYS_INLINE int uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::next_implicit(
1210 my_wc_t ch) {
1211 my_wc_t hangul_jamo[HANGUL_JAMO_MAX_LENGTH];
1212 int jamo_cnt;
1213
21/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 99072 times.
✓ Branch 17 taken 9377766 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 2972160 times.
✓ Branch 21 taken 286125393 times.
✓ Branch 22 taken 99072 times.
✓ Branch 23 taken 9566649 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 66054 times.
✓ Branch 27 taken 6377946 times.
✓ Branch 28 taken 924672 times.
✓ Branch 29 taken 89284608 times.
✓ Branch 30 taken 33024 times.
✓ Branch 31 taken 3195748 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✓ Branch 63 taken 26 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 3 times.
✓ Branch 95 taken 139 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 119 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✓ Branch 101 taken 138 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 117 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 1 times.
✓ Branch 107 taken 8034 times.
✗ Branch 108 not taken.
✓ Branch 109 taken 10829 times.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
408141570 if ((jamo_cnt = my_decompose_hangul_syllable(ch, hangul_jamo))) {
1214 4194058 my_put_jamo_weights(hangul_jamo, jamo_cnt);
1215 4194058 num_of_ce_left = jamo_cnt - 1;
1216 4194058 wbeg = implicit + MY_UCA_900_CE_SIZE + weight_lv;
1217 4194058 wbeg_stride = MY_UCA_900_CE_SIZE;
1218 4194058 return *(implicit + weight_lv);
1219 }
1220
1221 /*
1222 We give the Chinese collation different leading primary weight to make
1223 sure there are enough single weight values to be assigned to character
1224 groups like Latin, Cyrillic, etc.
1225 */
1226 uint page;
1227
41/224
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 9105408 times.
✓ Branch 33 taken 272358 times.
✓ Branch 34 taken 62208 times.
✓ Branch 35 taken 9043200 times.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 272736543 times.
✓ Branch 41 taken 13388850 times.
✓ Branch 42 taken 1866441 times.
✓ Branch 43 taken 270870102 times.
✓ Branch 44 taken 9105489 times.
✓ Branch 45 taken 461160 times.
✓ Branch 46 taken 62208 times.
✓ Branch 47 taken 9043281 times.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 6070326 times.
✓ Branch 53 taken 307620 times.
✓ Branch 54 taken 41472 times.
✓ Branch 55 taken 6028854 times.
✓ Branch 56 taken 84983808 times.
✓ Branch 57 taken 4300800 times.
✓ Branch 58 taken 580608 times.
✓ Branch 59 taken 84403200 times.
✓ Branch 60 taken 3041515 times.
✓ Branch 61 taken 154233 times.
✓ Branch 62 taken 20736 times.
✓ Branch 63 taken 3020779 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✓ Branch 124 taken 2 times.
✓ Branch 125 taken 24 times.
✗ Branch 126 not taken.
✓ Branch 127 taken 2 times.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✓ Branch 188 taken 139 times.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✓ Branch 191 taken 139 times.
✓ Branch 192 taken 119 times.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✓ Branch 195 taken 119 times.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✓ Branch 200 taken 138 times.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✓ Branch 203 taken 138 times.
✓ Branch 204 taken 117 times.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✓ Branch 207 taken 117 times.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✓ Branch 212 taken 4328 times.
✓ Branch 213 taken 3706 times.
✗ Branch 214 not taken.
✓ Branch 215 taken 4328 times.
✓ Branch 216 taken 7191 times.
✓ Branch 217 taken 3638 times.
✗ Branch 218 not taken.
✓ Branch 219 taken 7191 times.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
403947512 if (ch >= 0x17000 && ch <= 0x18AFF) // Tangut character
1228 {
1229 2633673 page = 0xFB00;
1230 2633673 implicit[3] = (ch - 0x17000) | 0x8000;
1231 } else {
1232 401313839 page = ch >> 15;
1233 401313839 implicit[3] = (ch & 0x7FFF) | 0x8000;
1234
106/560
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✓ Branch 80 taken 9315558 times.
✗ Branch 81 not taken.
✓ Branch 82 taken 9257472 times.
✓ Branch 83 taken 58086 times.
✓ Branch 84 taken 8835840 times.
✓ Branch 85 taken 421632 times.
✓ Branch 86 taken 8451441 times.
✓ Branch 87 taken 384399 times.
✓ Branch 88 taken 8451072 times.
✓ Branch 89 taken 422001 times.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✓ Branch 100 taken 284258952 times.
✗ Branch 101 not taken.
✓ Branch 102 taken 282588246 times.
✓ Branch 103 taken 1670706 times.
✓ Branch 104 taken 264649302 times.
✓ Branch 105 taken 17938944 times.
✓ Branch 106 taken 253501731 times.
✓ Branch 107 taken 11147571 times.
✓ Branch 108 taken 253491030 times.
✓ Branch 109 taken 17949645 times.
✓ Branch 110 taken 9504441 times.
✗ Branch 111 not taken.
✓ Branch 112 taken 9446841 times.
✓ Branch 113 taken 57600 times.
✓ Branch 114 taken 8835897 times.
✓ Branch 115 taken 610944 times.
✓ Branch 116 taken 8451498 times.
✓ Branch 117 taken 384399 times.
✓ Branch 118 taken 8451129 times.
✓ Branch 119 taken 611313 times.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✓ Branch 130 taken 6336474 times.
✗ Branch 131 not taken.
✓ Branch 132 taken 6298074 times.
✓ Branch 133 taken 38400 times.
✓ Branch 134 taken 5890598 times.
✓ Branch 135 taken 407476 times.
✓ Branch 136 taken 5634332 times.
✓ Branch 137 taken 256266 times.
✓ Branch 138 taken 5634086 times.
✓ Branch 139 taken 407722 times.
✓ Branch 140 taken 88704000 times.
✗ Branch 141 not taken.
✓ Branch 142 taken 88166400 times.
✓ Branch 143 taken 537600 times.
✓ Branch 144 taken 82467840 times.
✓ Branch 145 taken 5698560 times.
✓ Branch 146 taken 78880116 times.
✓ Branch 147 taken 3587724 times.
✓ Branch 148 taken 78876672 times.
✓ Branch 149 taken 5702004 times.
✓ Branch 150 taken 3175012 times.
✗ Branch 151 not taken.
✓ Branch 152 taken 3155810 times.
✓ Branch 153 taken 19202 times.
✓ Branch 154 taken 2951651 times.
✓ Branch 155 taken 204159 times.
✓ Branch 156 taken 2823515 times.
✓ Branch 157 taken 128136 times.
✓ Branch 158 taken 2823392 times.
✓ Branch 159 taken 204282 times.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✗ Branch 248 not taken.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✗ Branch 251 not taken.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✗ Branch 255 not taken.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✗ Branch 264 not taken.
✗ Branch 265 not taken.
✗ Branch 266 not taken.
✗ Branch 267 not taken.
✗ Branch 268 not taken.
✗ Branch 269 not taken.
✗ Branch 270 not taken.
✗ Branch 271 not taken.
✗ Branch 272 not taken.
✗ Branch 273 not taken.
✗ Branch 274 not taken.
✗ Branch 275 not taken.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✓ Branch 310 taken 26 times.
✗ Branch 311 not taken.
✓ Branch 312 taken 26 times.
✗ Branch 313 not taken.
✓ Branch 314 taken 2 times.
✓ Branch 315 taken 24 times.
✗ Branch 316 not taken.
✓ Branch 317 taken 2 times.
✗ Branch 318 not taken.
✓ Branch 319 taken 24 times.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✗ Branch 336 not taken.
✗ Branch 337 not taken.
✗ Branch 338 not taken.
✗ Branch 339 not taken.
✗ Branch 340 not taken.
✗ Branch 341 not taken.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✗ Branch 376 not taken.
✗ Branch 377 not taken.
✗ Branch 378 not taken.
✗ Branch 379 not taken.
✗ Branch 380 not taken.
✗ Branch 381 not taken.
✗ Branch 382 not taken.
✗ Branch 383 not taken.
✗ Branch 384 not taken.
✗ Branch 385 not taken.
✗ Branch 386 not taken.
✗ Branch 387 not taken.
✗ Branch 388 not taken.
✗ Branch 389 not taken.
✗ Branch 390 not taken.
✗ Branch 391 not taken.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✗ Branch 400 not taken.
✗ Branch 401 not taken.
✗ Branch 402 not taken.
✗ Branch 403 not taken.
✗ Branch 404 not taken.
✗ Branch 405 not taken.
✗ Branch 406 not taken.
✗ Branch 407 not taken.
✗ Branch 408 not taken.
✗ Branch 409 not taken.
✗ Branch 410 not taken.
✗ Branch 411 not taken.
✗ Branch 412 not taken.
✗ Branch 413 not taken.
✗ Branch 414 not taken.
✗ Branch 415 not taken.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✗ Branch 424 not taken.
✗ Branch 425 not taken.
✗ Branch 426 not taken.
✗ Branch 427 not taken.
✗ Branch 428 not taken.
✗ Branch 429 not taken.
✗ Branch 430 not taken.
✗ Branch 431 not taken.
✗ Branch 432 not taken.
✗ Branch 433 not taken.
✗ Branch 434 not taken.
✗ Branch 435 not taken.
✗ Branch 436 not taken.
✗ Branch 437 not taken.
✗ Branch 438 not taken.
✗ Branch 439 not taken.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
✗ Branch 444 not taken.
✗ Branch 445 not taken.
✗ Branch 446 not taken.
✗ Branch 447 not taken.
✗ Branch 448 not taken.
✗ Branch 449 not taken.
✗ Branch 450 not taken.
✗ Branch 451 not taken.
✗ Branch 452 not taken.
✗ Branch 453 not taken.
✗ Branch 454 not taken.
✗ Branch 455 not taken.
✗ Branch 456 not taken.
✗ Branch 457 not taken.
✗ Branch 458 not taken.
✗ Branch 459 not taken.
✗ Branch 460 not taken.
✗ Branch 461 not taken.
✗ Branch 462 not taken.
✗ Branch 463 not taken.
✗ Branch 464 not taken.
✗ Branch 465 not taken.
✗ Branch 466 not taken.
✗ Branch 467 not taken.
✗ Branch 468 not taken.
✗ Branch 469 not taken.
✓ Branch 470 taken 139 times.
✗ Branch 471 not taken.
✓ Branch 472 taken 139 times.
✗ Branch 473 not taken.
✓ Branch 474 taken 100 times.
✓ Branch 475 taken 39 times.
✓ Branch 476 taken 100 times.
✗ Branch 477 not taken.
✓ Branch 478 taken 100 times.
✓ Branch 479 taken 39 times.
✓ Branch 480 taken 119 times.
✗ Branch 481 not taken.
✓ Branch 482 taken 119 times.
✗ Branch 483 not taken.
✓ Branch 484 taken 94 times.
✓ Branch 485 taken 25 times.
✓ Branch 486 taken 94 times.
✗ Branch 487 not taken.
✓ Branch 488 taken 94 times.
✓ Branch 489 taken 25 times.
✗ Branch 490 not taken.
✗ Branch 491 not taken.
✗ Branch 492 not taken.
✗ Branch 493 not taken.
✗ Branch 494 not taken.
✗ Branch 495 not taken.
✗ Branch 496 not taken.
✗ Branch 497 not taken.
✗ Branch 498 not taken.
✗ Branch 499 not taken.
✓ Branch 500 taken 138 times.
✗ Branch 501 not taken.
✓ Branch 502 taken 138 times.
✗ Branch 503 not taken.
✓ Branch 504 taken 99 times.
✓ Branch 505 taken 39 times.
✓ Branch 506 taken 99 times.
✗ Branch 507 not taken.
✓ Branch 508 taken 99 times.
✓ Branch 509 taken 39 times.
✓ Branch 510 taken 117 times.
✗ Branch 511 not taken.
✓ Branch 512 taken 117 times.
✗ Branch 513 not taken.
✓ Branch 514 taken 93 times.
✓ Branch 515 taken 24 times.
✓ Branch 516 taken 93 times.
✗ Branch 517 not taken.
✓ Branch 518 taken 93 times.
✓ Branch 519 taken 24 times.
✗ Branch 520 not taken.
✗ Branch 521 not taken.
✗ Branch 522 not taken.
✗ Branch 523 not taken.
✗ Branch 524 not taken.
✗ Branch 525 not taken.
✗ Branch 526 not taken.
✗ Branch 527 not taken.
✗ Branch 528 not taken.
✗ Branch 529 not taken.
✓ Branch 530 taken 8034 times.
✗ Branch 531 not taken.
✓ Branch 532 taken 7785 times.
✓ Branch 533 taken 249 times.
✓ Branch 534 taken 4289 times.
✓ Branch 535 taken 3496 times.
✓ Branch 536 taken 3637 times.
✓ Branch 537 taken 652 times.
✓ Branch 538 taken 3637 times.
✓ Branch 539 taken 3496 times.
✓ Branch 540 taken 10829 times.
✗ Branch 541 not taken.
✓ Branch 542 taken 10631 times.
✓ Branch 543 taken 198 times.
✓ Branch 544 taken 7167 times.
✓ Branch 545 taken 3464 times.
✓ Branch 546 taken 6541 times.
✓ Branch 547 taken 626 times.
✓ Branch 548 taken 6541 times.
✓ Branch 549 taken 3464 times.
✗ Branch 550 not taken.
✗ Branch 551 not taken.
✗ Branch 552 not taken.
✗ Branch 553 not taken.
✗ Branch 554 not taken.
✗ Branch 555 not taken.
✗ Branch 556 not taken.
✗ Branch 557 not taken.
✗ Branch 558 not taken.
✗ Branch 559 not taken.
401313839 if ((ch >= 0x3400 && ch <= 0x4DB5) || (ch >= 0x20000 && ch <= 0x2A6D6) ||
1235
92/448
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✓ Branch 64 taken 8413731 times.
✓ Branch 65 taken 37341 times.
✓ Branch 66 taken 8413632 times.
✓ Branch 67 taken 422100 times.
✓ Branch 68 taken 8411634 times.
✓ Branch 69 taken 1998 times.
✓ Branch 70 taken 8411616 times.
✓ Branch 71 taken 422118 times.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✓ Branch 80 taken 252398448 times.
✓ Branch 81 taken 1092582 times.
✓ Branch 82 taken 252395478 times.
✓ Branch 83 taken 17952615 times.
✓ Branch 84 taken 252335808 times.
✓ Branch 85 taken 59670 times.
✓ Branch 86 taken 252335286 times.
✓ Branch 87 taken 17953137 times.
✓ Branch 88 taken 8413788 times.
✓ Branch 89 taken 37341 times.
✓ Branch 90 taken 8413689 times.
✓ Branch 91 taken 611412 times.
✓ Branch 92 taken 8411667 times.
✓ Branch 93 taken 2022 times.
✓ Branch 94 taken 8411649 times.
✓ Branch 95 taken 611430 times.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✓ Branch 104 taken 5609192 times.
✓ Branch 105 taken 24894 times.
✓ Branch 106 taken 5609126 times.
✓ Branch 107 taken 407788 times.
✓ Branch 108 taken 5607778 times.
✓ Branch 109 taken 1348 times.
✓ Branch 110 taken 5607766 times.
✓ Branch 111 taken 407800 times.
✓ Branch 112 taken 78528156 times.
✓ Branch 113 taken 348516 times.
✓ Branch 114 taken 78527232 times.
✓ Branch 115 taken 5702928 times.
✓ Branch 116 taken 78508584 times.
✓ Branch 117 taken 18648 times.
✓ Branch 118 taken 78508416 times.
✓ Branch 119 taken 5703096 times.
✓ Branch 120 taken 2810945 times.
✓ Branch 121 taken 12447 times.
✓ Branch 122 taken 2810912 times.
✓ Branch 123 taken 204315 times.
✓ Branch 124 taken 2810238 times.
✓ Branch 125 taken 674 times.
✓ Branch 126 taken 2810232 times.
✓ Branch 127 taken 204321 times.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✗ Branch 248 not taken.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✓ Branch 251 taken 24 times.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✓ Branch 255 taken 24 times.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✗ Branch 264 not taken.
✗ Branch 265 not taken.
✗ Branch 266 not taken.
✗ Branch 267 not taken.
✗ Branch 268 not taken.
✗ Branch 269 not taken.
✗ Branch 270 not taken.
✗ Branch 271 not taken.
✗ Branch 272 not taken.
✗ Branch 273 not taken.
✗ Branch 274 not taken.
✗ Branch 275 not taken.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✗ Branch 310 not taken.
✗ Branch 311 not taken.
✗ Branch 312 not taken.
✗ Branch 313 not taken.
✗ Branch 314 not taken.
✗ Branch 315 not taken.
✗ Branch 316 not taken.
✗ Branch 317 not taken.
✗ Branch 318 not taken.
✗ Branch 319 not taken.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✗ Branch 336 not taken.
✗ Branch 337 not taken.
✗ Branch 338 not taken.
✗ Branch 339 not taken.
✗ Branch 340 not taken.
✗ Branch 341 not taken.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✓ Branch 376 taken 100 times.
✗ Branch 377 not taken.
✓ Branch 378 taken 100 times.
✓ Branch 379 taken 39 times.
✓ Branch 380 taken 56 times.
✓ Branch 381 taken 44 times.
✓ Branch 382 taken 56 times.
✓ Branch 383 taken 39 times.
✓ Branch 384 taken 94 times.
✗ Branch 385 not taken.
✓ Branch 386 taken 94 times.
✓ Branch 387 taken 25 times.
✓ Branch 388 taken 61 times.
✓ Branch 389 taken 33 times.
✓ Branch 390 taken 61 times.
✓ Branch 391 taken 25 times.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✓ Branch 400 taken 99 times.
✗ Branch 401 not taken.
✓ Branch 402 taken 99 times.
✓ Branch 403 taken 39 times.
✓ Branch 404 taken 55 times.
✓ Branch 405 taken 44 times.
✓ Branch 406 taken 55 times.
✓ Branch 407 taken 39 times.
✓ Branch 408 taken 93 times.
✗ Branch 409 not taken.
✓ Branch 410 taken 93 times.
✓ Branch 411 taken 24 times.
✓ Branch 412 taken 60 times.
✓ Branch 413 taken 33 times.
✓ Branch 414 taken 60 times.
✓ Branch 415 taken 24 times.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✓ Branch 424 taken 3637 times.
✗ Branch 425 not taken.
✓ Branch 426 taken 3637 times.
✓ Branch 427 taken 3496 times.
✓ Branch 428 taken 3593 times.
✓ Branch 429 taken 44 times.
✓ Branch 430 taken 3593 times.
✓ Branch 431 taken 3496 times.
✓ Branch 432 taken 6541 times.
✗ Branch 433 not taken.
✓ Branch 434 taken 6541 times.
✓ Branch 435 taken 3464 times.
✓ Branch 436 taken 6508 times.
✓ Branch 437 taken 33 times.
✓ Branch 438 taken 6508 times.
✓ Branch 439 taken 3464 times.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
✗ Branch 444 not taken.
✗ Branch 445 not taken.
✗ Branch 446 not taken.
✗ Branch 447 not taken.
383042023 (ch >= 0x2A700 && ch <= 0x2B734) || (ch >= 0x2B740 && ch <= 0x2B81D) ||
1236
24/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 51858 times.
✓ Branch 17 taken 8359758 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 1543050 times.
✓ Branch 21 taken 250792236 times.
✓ Branch 22 taken 51882 times.
✓ Branch 23 taken 8359767 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 34588 times.
✓ Branch 27 taken 5573178 times.
✓ Branch 28 taken 484008 times.
✓ Branch 29 taken 78024408 times.
✓ Branch 30 taken 17294 times.
✓ Branch 31 taken 2792938 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 31 times.
✓ Branch 95 taken 25 times.
✓ Branch 96 taken 44 times.
✓ Branch 97 taken 17 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✓ Branch 100 taken 31 times.
✓ Branch 101 taken 24 times.
✓ Branch 102 taken 44 times.
✓ Branch 103 taken 16 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 31 times.
✓ Branch 107 taken 3562 times.
✓ Branch 108 taken 44 times.
✓ Branch 109 taken 6464 times.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
356095298 (ch >= 0x2B820 && ch <= 0x2CEA1)) {
1237 22092433 page += 0xFB80;
1238
63/448
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✓ Branch 64 taken 8781876 times.
✗ Branch 65 not taken.
✓ Branch 66 taken 8781876 times.
✗ Branch 67 not taken.
✓ Branch 68 taken 8724276 times.
✓ Branch 69 taken 57600 times.
✗ Branch 70 not taken.
✓ Branch 71 taken 8724276 times.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✓ Branch 80 taken 268745373 times.
✗ Branch 81 not taken.
✓ Branch 82 taken 263465973 times.
✓ Branch 83 taken 5279400 times.
✓ Branch 84 taken 261727389 times.
✓ Branch 85 taken 1738584 times.
✗ Branch 86 not taken.
✓ Branch 87 taken 261727389 times.
✓ Branch 88 taken 8971197 times.
✗ Branch 89 not taken.
✓ Branch 90 taken 8782287 times.
✓ Branch 91 taken 188910 times.
✓ Branch 92 taken 8724309 times.
✓ Branch 93 taken 57978 times.
✗ Branch 94 not taken.
✓ Branch 95 taken 8724309 times.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✓ Branch 104 taken 5980978 times.
✗ Branch 105 not taken.
✓ Branch 106 taken 5854858 times.
✓ Branch 107 taken 126120 times.
✓ Branch 108 taken 5816206 times.
✓ Branch 109 taken 38652 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 5816206 times.
✓ Branch 112 taken 83727504 times.
✗ Branch 113 not taken.
✓ Branch 114 taken 81967704 times.
✓ Branch 115 taken 1759800 times.
✓ Branch 116 taken 81426576 times.
✓ Branch 117 taken 541128 times.
✗ Branch 118 not taken.
✓ Branch 119 taken 81426576 times.
✓ Branch 120 taken 2997259 times.
✗ Branch 121 not taken.
✓ Branch 122 taken 2933778 times.
✓ Branch 123 taken 63481 times.
✓ Branch 124 taken 2914452 times.
✓ Branch 125 taken 19326 times.
✗ Branch 126 not taken.
✓ Branch 127 taken 2914452 times.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✓ Branch 248 taken 24 times.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✓ Branch 251 taken 24 times.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✗ Branch 255 not taken.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✗ Branch 264 not taken.
✗ Branch 265 not taken.
✗ Branch 266 not taken.
✗ Branch 267 not taken.
✗ Branch 268 not taken.
✗ Branch 269 not taken.
✗ Branch 270 not taken.
✗ Branch 271 not taken.
✗ Branch 272 not taken.
✗ Branch 273 not taken.
✗ Branch 274 not taken.
✗ Branch 275 not taken.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✗ Branch 310 not taken.
✗ Branch 311 not taken.
✗ Branch 312 not taken.
✗ Branch 313 not taken.
✗ Branch 314 not taken.
✗ Branch 315 not taken.
✗ Branch 316 not taken.
✗ Branch 317 not taken.
✗ Branch 318 not taken.
✗ Branch 319 not taken.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✗ Branch 336 not taken.
✗ Branch 337 not taken.
✗ Branch 338 not taken.
✗ Branch 339 not taken.
✗ Branch 340 not taken.
✗ Branch 341 not taken.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✓ Branch 376 taken 64 times.
✗ Branch 377 not taken.
✓ Branch 378 taken 64 times.
✗ Branch 379 not taken.
✓ Branch 380 taken 64 times.
✗ Branch 381 not taken.
✗ Branch 382 not taken.
✓ Branch 383 taken 64 times.
✓ Branch 384 taken 42 times.
✗ Branch 385 not taken.
✓ Branch 386 taken 42 times.
✗ Branch 387 not taken.
✓ Branch 388 taken 42 times.
✗ Branch 389 not taken.
✗ Branch 390 not taken.
✓ Branch 391 taken 42 times.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✓ Branch 400 taken 63 times.
✗ Branch 401 not taken.
✓ Branch 402 taken 63 times.
✗ Branch 403 not taken.
✓ Branch 404 taken 63 times.
✗ Branch 405 not taken.
✗ Branch 406 not taken.
✓ Branch 407 taken 63 times.
✓ Branch 408 taken 40 times.
✗ Branch 409 not taken.
✓ Branch 410 taken 40 times.
✗ Branch 411 not taken.
✓ Branch 412 taken 40 times.
✗ Branch 413 not taken.
✗ Branch 414 not taken.
✓ Branch 415 taken 40 times.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✓ Branch 424 taken 7058 times.
✗ Branch 425 not taken.
✓ Branch 426 taken 3601 times.
✓ Branch 427 taken 3457 times.
✓ Branch 428 taken 3601 times.
✗ Branch 429 not taken.
✗ Branch 430 not taken.
✓ Branch 431 taken 3601 times.
✓ Branch 432 taken 9928 times.
✗ Branch 433 not taken.
✓ Branch 434 taken 6488 times.
✓ Branch 435 taken 3440 times.
✓ Branch 436 taken 6488 times.
✗ Branch 437 not taken.
✗ Branch 438 not taken.
✓ Branch 439 taken 6488 times.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
✗ Branch 444 not taken.
✗ Branch 445 not taken.
✗ Branch 446 not taken.
✗ Branch 447 not taken.
379221406 } else if ((ch >= 0x4E00 && ch <= 0x9FD5) || (ch >= 0xFA0E && ch <= 0xFA29))
1239 7424632 page += 0xFB40;
1240 else
1241 371796774 page += 0xFBC0;
1242 }
1243
14/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 9377766 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 8893983 times.
✓ Branch 21 taken 277231410 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 9566649 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 6377946 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 89284608 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 3195748 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✓ Branch 63 taken 26 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✓ Branch 95 taken 139 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 119 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✓ Branch 101 taken 138 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 117 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✓ Branch 107 taken 8034 times.
✗ Branch 108 not taken.
✓ Branch 109 taken 10829 times.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
403947512 if (cs->coll_param == &zh_coll_param) {
1244 8893983 page = change_zh_implicit(page);
1245 }
1246 403947512 implicit[0] = page;
1247 403947512 implicit[1] = 0x0020;
1248 403947512 implicit[2] = 0x0002;
1249 // implicit[3] is set above.
1250 403947512 implicit[4] = 0;
1251 403947512 implicit[5] = 0;
1252 403947512 num_of_ce_left = 1;
1253 403947512 wbeg = implicit + MY_UCA_900_CE_SIZE + weight_lv;
1254 403947512 wbeg_stride = MY_UCA_900_CE_SIZE;
1255
1256 403947512 return *(implicit + weight_lv);
1257 }
1258
1259 /**
1260 Return implicit UCA weight
1261 Used for code points that do not have assigned UCA weights.
1262
1263 @return The leading implicit weight.
1264 */
1265
1266 template <class Mb_wc>
1267 ALWAYS_INLINE int uca_scanner_any<Mb_wc>::next_implicit(my_wc_t ch) {
1268 30495286 implicit[0] = (ch & 0x7FFF) | 0x8000;
1269 30495286 implicit[1] = 0;
1270 30495286 wbeg = implicit;
1271 30495286 wbeg_stride = MY_UCA_900_CE_SIZE;
1272
1273 30495286 uint page = ch >> 15;
1274
1275
10/36
✓ Branch 0 taken 1862400 times.
✓ Branch 1 taken 21439551 times.
✓ Branch 2 taken 460800 times.
✓ Branch 3 taken 6022659 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 372 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 357 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 12800 times.
✓ Branch 17 taken 76815 times.
✓ Branch 18 taken 12800 times.
✓ Branch 19 taken 76812 times.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
29965366 if (ch >= 0x3400 && ch <= 0x4DB5)
1276 2348800 page += 0xFB80;
1277
18/72
✓ Branch 0 taken 21439551 times.
✓ Branch 1 taken 423936 times.
✓ Branch 2 taken 6137832 times.
✓ Branch 3 taken 15301719 times.
✓ Branch 4 taken 6022659 times.
✓ Branch 5 taken 105984 times.
✓ Branch 6 taken 1504944 times.
✓ Branch 7 taken 4517715 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 372 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✓ Branch 19 taken 372 times.
✓ Branch 20 taken 357 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✓ Branch 23 taken 357 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 76815 times.
✗ Branch 33 not taken.
✓ Branch 34 taken 41804 times.
✓ Branch 35 taken 35011 times.
✓ Branch 36 taken 76812 times.
✗ Branch 37 not taken.
✓ Branch 38 taken 41804 times.
✓ Branch 39 taken 35008 times.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
28146486 else if (ch >= 0x4E00 && ch <= 0x9FA5)
1278 7726384 page += 0xFB40;
1279 else
1280 20420102 page += 0xFBC0;
1281
1282 30495286 return page;
1283 }
1284
1285 template <class Mb_wc>
1286 ALWAYS_INLINE int uca_scanner_any<Mb_wc>::next() {
1287 /*
1288 Check if the weights for the previous code point have been
1289 already fully scanned. If yes, then get the next code point and
1290 initialize wbeg and wlength to its weight string.
1291 */
1292
1293
16/28
✓ Branch 0 taken 25421620 times.
✓ Branch 1 taken 347255757 times.
✓ Branch 2 taken 7002842 times.
✓ Branch 3 taken 160781076 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 102026 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 26946 times.
✓ Branch 8 taken 4312 times.
✓ Branch 9 taken 7436495 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 368 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 1428 times.
✓ Branch 14 taken 101974 times.
✓ Branch 15 taken 290308 times.
✗ Branch 16 not taken.
✓ Branch 17 taken 8 times.
✗ Branch 18 not taken.
✓ Branch 19 taken 20 times.
✗ Branch 20 not taken.
✓ Branch 21 taken 98 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 84 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
548425362 if (wbeg[0]) /* More weights left from the previous step: */
1294 32635856 return *wbeg++; /* return the next weight from expansion */
1295
1296 do {
1297 523830696 my_wc_t wc = 0;
1298
1299 /* Get next code point */
1300
23/36
✓ Branch 0 taken 347425992 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 80229898 times.
✓ Branch 3 taken 80586689 times.
✓ Branch 4 taken 102026 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 648 times.
✓ Branch 7 taken 26298 times.
✓ Branch 8 taken 7421784 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 7436921 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 370 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1428 times.
✗ Branch 15 not taken.
✓ Branch 16 taken 141298 times.
✓ Branch 17 taken 150340 times.
✓ Branch 18 taken 136936 times.
✓ Branch 19 taken 155155 times.
✓ Branch 20 taken 5 times.
✓ Branch 21 taken 4 times.
✓ Branch 22 taken 20 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 100 times.
✗ Branch 25 not taken.
✓ Branch 26 taken 98 times.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 30 times.
✓ Branch 31 taken 54 times.
✓ Branch 32 taken 30 times.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
523830696 int mblen = mb_wc(&wc, sbeg, send);
1301
31/36
✓ Branch 0 taken 173831771 times.
✓ Branch 1 taken 173594221 times.
✓ Branch 2 taken 80672266 times.
✓ Branch 3 taken 80144321 times.
✓ Branch 4 taken 2136 times.
✓ Branch 5 taken 99890 times.
✓ Branch 6 taken 648 times.
✓ Branch 7 taken 26298 times.
✓ Branch 8 taken 113777 times.
✓ Branch 9 taken 7308007 times.
✓ Branch 10 taken 113591 times.
✓ Branch 11 taken 7323330 times.
✓ Branch 12 taken 232 times.
✓ Branch 13 taken 138 times.
✓ Branch 14 taken 418 times.
✓ Branch 15 taken 1010 times.
✓ Branch 16 taken 141298 times.
✓ Branch 17 taken 150340 times.
✓ Branch 18 taken 136936 times.
✓ Branch 19 taken 155155 times.
✓ Branch 20 taken 5 times.
✓ Branch 21 taken 4 times.
✓ Branch 22 taken 20 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 19 times.
✓ Branch 25 taken 81 times.
✓ Branch 26 taken 19 times.
✓ Branch 27 taken 79 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 30 times.
✓ Branch 31 taken 54 times.
✓ Branch 32 taken 30 times.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
523816178 if (mblen <= 0) {
1302 255013196 ++weight_lv;
1303
19/36
✓ Branch 0 taken 173389403 times.
✓ Branch 1 taken 442368 times.
✓ Branch 2 taken 80229898 times.
✓ Branch 3 taken 442368 times.
✓ Branch 4 taken 2136 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 648 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 113778 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 112565 times.
✓ Branch 11 taken 1026 times.
✓ Branch 12 taken 232 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 418 times.
✗ Branch 15 not taken.
✓ Branch 16 taken 141298 times.
✗ Branch 17 not taken.
✓ Branch 18 taken 136936 times.
✗ Branch 19 not taken.
✓ Branch 20 taken 5 times.
✗ Branch 21 not taken.
✓ Branch 22 taken 20 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 19 times.
✗ Branch 25 not taken.
✓ Branch 26 taken 19 times.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 30 times.
✗ Branch 31 not taken.
✓ Branch 32 taken 30 times.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
503452146 if (sbeg >= send) return -1; /* No more bytes, end of line reached */
1304 /*
1305 There are some more bytes left. Non-positive mb_len means that
1306 we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
1307 */
1308
3/36
✗ Branch 0 not taken.
✓ Branch 1 taken 442368 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 442368 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1026 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
885761 if ((sbeg += cs->mbminlen) > send) {
1309 /* For safety purposes don't go beyond the string range. */
1310 sbeg = send;
1311 }
1312 /*
1313 Treat every complete or incomplete mbminlen unit as a weight which is
1314 greater than weight for any possible normal character.
1315 0xFFFF is greater than any possible weight in the UCA weight table.
1316 */
1317 885761 return 0xFFFF;
1318 }
1319
1320 268802982 sbeg += mblen;
1321 268802982 char_index++;
1322
18/36
✓ Branch 0 taken 144703492 times.
✓ Branch 1 taken 28890729 times.
✓ Branch 2 taken 72351751 times.
✓ Branch 3 taken 7792570 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 99890 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 26298 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 7308007 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 7323330 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 138 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 1010 times.
✗ Branch 16 not taken.
✓ Branch 17 taken 150340 times.
✓ Branch 18 taken 1 times.
✓ Branch 19 taken 155154 times.
✗ Branch 20 not taken.
✓ Branch 21 taken 4 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 81 times.
✗ Branch 26 not taken.
✓ Branch 27 taken 79 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 54 times.
✗ Branch 32 not taken.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
268802982 if (wc > uca->maxchar) {
1323 /* Return 0xFFFD as weight for all characters outside BMP */
1324 217055244 wbeg = nochar;
1325 217055244 wbeg_stride = 0;
1326 217055244 return 0xFFFD;
1327 }
1328
1329
24/36
✓ Branch 0 taken 4798552 times.
✓ Branch 1 taken 24092177 times.
✓ Branch 2 taken 1161710 times.
✓ Branch 3 taken 6630860 times.
✓ Branch 4 taken 35488 times.
✓ Branch 5 taken 64402 times.
✓ Branch 6 taken 8913 times.
✓ Branch 7 taken 17385 times.
✓ Branch 8 taken 4441 times.
✓ Branch 9 taken 7309177 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 7308254 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 138 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 1010 times.
✓ Branch 16 taken 4263 times.
✓ Branch 17 taken 146077 times.
✓ Branch 18 taken 4468 times.
✓ Branch 19 taken 150686 times.
✗ Branch 20 not taken.
✓ Branch 21 taken 4 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 2 times.
✓ Branch 25 taken 79 times.
✓ Branch 26 taken 2 times.
✓ Branch 27 taken 77 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 54 times.
✗ Branch 32 not taken.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
51747738 if (my_uca_have_contractions(uca)) {
1330 const uint16 *cweight;
1331 /*
1332 If we have scanned a code point which can have previous context,
1333 and there were some more code point already before,
1334 then verify that {prev_char, wc} together form
1335 a real previous context pair.
1336 Note, we support only 2-character long sequences with previous
1337 context at the moment. CLDR does not have longer sequences.
1338 */
1339 6004265 if (my_uca_can_be_previous_context_tail(uca->contraction_flags, wc) &&
1340
4/72
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 40 times.
✓ Branch 3 taken 16 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
120 wbeg != nochar && /* if not the very first character */
1341 56 my_uca_can_be_previous_context_head(uca->contraction_flags,
1342
22/72
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 4798488 times.
✓ Branch 2 taken 40 times.
✓ Branch 3 taken 4798512 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 1161710 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 1161710 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 35488 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 35488 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 8913 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 8913 times.
✗ Branch 16 not taken.
✓ Branch 17 taken 14030 times.
✗ Branch 18 not taken.
✓ Branch 19 taken 14030 times.
✗ Branch 20 not taken.
✓ Branch 21 taken 14836 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 14836 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✓ Branch 33 taken 4263 times.
✗ Branch 34 not taken.
✓ Branch 35 taken 4263 times.
✗ Branch 36 not taken.
✓ Branch 37 taken 4468 times.
✗ Branch 38 not taken.
✓ Branch 39 taken 4468 times.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 2 times.
✗ Branch 50 not taken.
✓ Branch 51 taken 2 times.
✗ Branch 52 not taken.
✓ Branch 53 taken 2 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 2 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
6042328 prev_char) &&
1343
2/72
✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 40 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
80 (cweight = previous_context_find(prev_char, wc))) {
1344 40 prev_char = 0; /* Clear for the next character */
1345 40 return *cweight;
1346
18/36
✓ Branch 0 taken 5210 times.
✓ Branch 1 taken 4793302 times.
✓ Branch 2 taken 1050 times.
✓ Branch 3 taken 1160660 times.
✓ Branch 4 taken 1280 times.
✓ Branch 5 taken 34208 times.
✓ Branch 6 taken 320 times.
✓ Branch 7 taken 8593 times.
✓ Branch 8 taken 1425 times.
✓ Branch 9 taken 12605 times.
✓ Branch 10 taken 1159 times.
✓ Branch 11 taken 13677 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 540 times.
✓ Branch 17 taken 3723 times.
✓ Branch 18 taken 421 times.
✓ Branch 19 taken 4047 times.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 2 times.
✗ Branch 26 not taken.
✓ Branch 27 taken 2 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
6042224 } else if (my_uca_can_be_contraction_head(uca->contraction_flags, wc)) {
1347 /* Check if wc starts a contraction */
1348 size_t chars_skipped;
1349
24/72
✓ Branch 0 taken 5210 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1007 times.
✓ Branch 3 taken 4203 times.
✓ Branch 4 taken 1050 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 36 times.
✓ Branch 7 taken 1014 times.
✓ Branch 8 taken 1280 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 244 times.
✓ Branch 11 taken 1036 times.
✓ Branch 12 taken 320 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 61 times.
✓ Branch 15 taken 259 times.
✓ Branch 16 taken 1425 times.
✗ Branch 17 not taken.
✓ Branch 18 taken 508 times.
✓ Branch 19 taken 917 times.
✓ Branch 20 taken 1159 times.
✗ Branch 21 not taken.
✓ Branch 22 taken 583 times.
✓ Branch 23 taken 576 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 540 times.
✗ Branch 33 not taken.
✓ Branch 34 taken 59 times.
✓ Branch 35 taken 481 times.
✓ Branch 36 taken 421 times.
✗ Branch 37 not taken.
✓ Branch 38 taken 121 times.
✓ Branch 39 taken 300 times.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
11405 if ((cweight = contraction_find(wc, &chars_skipped))) {
1350 2619 char_index += chars_skipped;
1351 2619 return *cweight;
1352 }
1353 }
1354 6039605 prev_char = wc;
1355 }
1356
1357 /* Process single code point */
1358 51760039 uint page = wc >> 8;
1359 51760039 uint code = wc & 0xFF;
1360
1361 /* If weight page for wc does not exist, then calculate algorithmically */
1362 51760039 const uint16 *wpage = uca->weights[page];
1363
29/72
✓ Branch 0 taken 23725887 times.
✓ Branch 1 taken 5163795 times.
✓ Branch 2 taken 23301951 times.
✓ Branch 3 taken 423936 times.
✓ Branch 4 taken 6589443 times.
✓ Branch 5 taken 1203091 times.
✓ Branch 6 taken 6483459 times.
✓ Branch 7 taken 105984 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 99646 times.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✓ Branch 13 taken 26237 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 372 times.
✓ Branch 17 taken 7322327 times.
✓ Branch 18 taken 372 times.
✗ Branch 19 not taken.
✓ Branch 20 taken 357 times.
✓ Branch 21 taken 7322150 times.
✓ Branch 22 taken 357 times.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 138 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✓ Branch 29 taken 1010 times.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 89615 times.
✓ Branch 33 taken 60666 times.
✓ Branch 34 taken 89615 times.
✗ Branch 35 not taken.
✓ Branch 36 taken 89612 times.
✓ Branch 37 taken 65421 times.
✓ Branch 38 taken 89612 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✓ Branch 41 taken 4 times.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 81 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✓ Branch 53 taken 79 times.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✓ Branch 61 taken 54 times.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✓ Branch 65 taken 54 times.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
82255325 if (!wpage) return next_implicit(wc);
1364
1365 /* Calculate pointer to wc's weight, using page and offset */
1366 21264753 wbeg = wpage + code * uca->lengths[page];
1367 21264753 wbeg_stride = UCA900_DISTANCE_BETWEEN_WEIGHTS;
1368
23/36
✓ Branch 0 taken 170235 times.
✓ Branch 1 taken 4993560 times.
✓ Branch 2 taken 35511 times.
✓ Branch 3 taken 1167580 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 99646 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 26237 times.
✓ Branch 8 taken 362 times.
✓ Branch 9 taken 7321965 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 7323166 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 136 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 1010 times.
✓ Branch 16 taken 1330 times.
✓ Branch 17 taken 59336 times.
✓ Branch 18 taken 1365 times.
✓ Branch 19 taken 64056 times.
✓ Branch 20 taken 1 times.
✓ Branch 21 taken 3 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 2 times.
✓ Branch 25 taken 79 times.
✗ Branch 26 not taken.
✓ Branch 27 taken 79 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 54 times.
✗ Branch 32 not taken.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
21264753 } while (!wbeg[0]); /* Skip ignorable code points */
1369
1370 21056961 return *wbeg++;
1371 }
1372
1373 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1374 79843923616 inline int uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::more_weight() {
1375 /*
1376 Check if the weights for the previous code point have been
1377 already fully scanned. If no, return the first non-zero
1378 weight.
1379 */
1380
1381
4/4
✓ Branch 0 taken 418595692 times.
✓ Branch 1 taken 39712863105 times.
✓ Branch 2 taken 209496989 times.
✓ Branch 3 taken 209098703 times.
80262917594 while (num_of_ce_left != 0 && *wbeg == 0) {
1382 418993978 wbeg += wbeg_stride;
1383 418993978 --num_of_ce_left;
1384 }
1385
2/2
✓ Branch 0 taken 209098703 times.
✓ Branch 1 taken 39712863105 times.
79843923616 if (num_of_ce_left != 0) {
1386 418197406 uint16 rtn = *wbeg;
1387 418197406 wbeg += wbeg_stride;
1388 418197406 --num_of_ce_left;
1389 418197406 return rtn; /* return the next weight from expansion */
1390 }
1391 79425726210 return -1;
1392 }
1393
1394 3341369 static inline bool is_hiragana_char(my_wc_t wc) {
1395
4/4
✓ Branch 0 taken 3301899 times.
✓ Branch 1 taken 39470 times.
✓ Branch 2 taken 2449 times.
✓ Branch 3 taken 3299450 times.
3341369 return wc >= 0x3041 && wc <= 0x3096;
1396 }
1397
1398 3343491 static inline bool is_katakana_char(my_wc_t wc) {
1399
6/6
✓ Branch 0 taken 3301534 times.
✓ Branch 1 taken 41957 times.
✓ Branch 2 taken 3299777 times.
✓ Branch 3 taken 1757 times.
✓ Branch 4 taken 3146358 times.
✓ Branch 5 taken 195376 times.
6489849 return (wc >= 0x30A1 && wc <= 0x30FA) || // Full width katakana
1400
2/2
✓ Branch 0 taken 336 times.
✓ Branch 1 taken 3146022 times.
6489849 (wc >= 0xFF66 && wc <= 0xFF9D); // Half width katakana
1401 }
1402
1403 3341370 static inline bool is_katakana_iteration(my_wc_t wc) {
1404
4/4
✓ Branch 0 taken 3341362 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 3341356 times.
3341370 return wc == 0x30FD || wc == 0x30FE;
1405 }
1406
1407 3338920 static inline bool is_hiragana_iteration(my_wc_t wc) {
1408
4/4
✓ Branch 0 taken 3338912 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 3338906 times.
3338920 return wc == 0x309D || wc == 0x309E;
1409 }
1410
1411 3340713 static inline bool is_ja_length_mark(my_wc_t wc) { return wc == 0x30FC; }
1412
1413 /**
1414 Return quaternary weight when running for that level.
1415
1416 @retval 0 - Do not return quaternary weight.
1417 @retval others - Quaternary weight for this character.
1418 */
1419 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1420 ALWAYS_INLINE int
1421 uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::handle_ja_contraction_quat_wt() {
1422 /*
1423 For Japanese, only weight shift rule and previous context rule is
1424 defined. And in previous context rules, the first character is always
1425 katakana / hiragana, and the second character is always iteration or
1426 length mark. The quaternary weight of iteration / length mark is
1427 same as the first character. So has_quaternary_weight is always true.
1428 For how we return quaternary weight, please refer to the comment in
1429 handle_ja_common_quat_wt().
1430 */
1431
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 224 times.
✓ Branch 5 taken 1389 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 14 times.
✓ Branch 13 taken 42 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 12 times.
✓ Branch 23 taken 45 times.
✓ Branch 24 taken 12 times.
✓ Branch 25 taken 45 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
1783 if (weight_lv == 3) {
1432 262 wbeg = nochar;
1433 262 num_of_ce_left = 0;
1434
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 214 times.
✓ Branch 5 taken 10 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 8 times.
✓ Branch 13 taken 6 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 6 times.
✓ Branch 23 taken 6 times.
✓ Branch 24 taken 6 times.
✓ Branch 25 taken 6 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
262 if (is_katakana_char(prev_char)) {
1435 234 return JA_KATA_QUAT_WEIGHT;
1436
4/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 6 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 6 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 6 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
28 } else if (is_hiragana_char(prev_char)) {
1437 28 return JA_HIRA_QUAT_WEIGHT;
1438 }
1439 }
1440 1521 return 0;
1441 }
1442
1443 /**
1444 Check whether quaternary weight is needed for character with Japanese
1445 kana-sensitive collation. If it is, return quaternary weight when running
1446 for that level.
1447
1448 @retval 0 - Quaternary weight check is done.
1449 @retval -1 - There is no quaternary weight for this character.
1450 @retval others - Quaternary weight for this character.
1451 */
1452 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1453 ALWAYS_INLINE int
1454 uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::handle_ja_common_quat_wt(
1455 my_wc_t wc) {
1456 /*
1457 For Japanese kana-sensitive collation, we detect whether quaternary
1458 weight is necessary when scanning for the first level of weight.
1459 If it is, the quaternary weight will be returned for katakana /
1460 hiragana later.
1461 */
1462
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 3337783 times.
✓ Branch 5 taken 7338 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 740 times.
✓ Branch 13 taken 39 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 80 times.
✓ Branch 23 taken 54 times.
✓ Branch 24 taken 79 times.
✓ Branch 25 taken 54 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
3346167 if (weight_lv == 0 && !has_quaternary_weight) {
1463
14/56
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 3337288 times.
✓ Branch 9 taken 6 times.
✓ Branch 10 taken 3336717 times.
✓ Branch 11 taken 571 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 724 times.
✗ Branch 25 not taken.
✓ Branch 26 taken 712 times.
✓ Branch 27 taken 12 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✓ Branch 44 taken 59 times.
✗ Branch 45 not taken.
✓ Branch 46 taken 31 times.
✓ Branch 47 taken 28 times.
✓ Branch 48 taken 56 times.
✓ Branch 49 taken 1 times.
✓ Branch 50 taken 31 times.
✓ Branch 51 taken 25 times.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
10014943 if (is_katakana_char(wc) || is_katakana_iteration(wc) ||
1464
30/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 3337294 times.
✓ Branch 17 taken 489 times.
✓ Branch 18 taken 3336711 times.
✓ Branch 19 taken 6 times.
✓ Branch 20 taken 5 times.
✓ Branch 21 taken 3336706 times.
✓ Branch 22 taken 1077 times.
✓ Branch 23 taken 3336706 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 724 times.
✓ Branch 49 taken 16 times.
✓ Branch 50 taken 712 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 4 times.
✓ Branch 53 taken 708 times.
✓ Branch 54 taken 32 times.
✓ Branch 55 taken 708 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 59 times.
✓ Branch 89 taken 21 times.
✓ Branch 90 taken 30 times.
✓ Branch 91 taken 1 times.
✓ Branch 92 taken 3 times.
✓ Branch 93 taken 27 times.
✓ Branch 94 taken 53 times.
✓ Branch 95 taken 27 times.
✓ Branch 96 taken 57 times.
✓ Branch 97 taken 22 times.
✓ Branch 98 taken 31 times.
✗ Branch 99 not taken.
✓ Branch 100 taken 8 times.
✓ Branch 101 taken 23 times.
✓ Branch 102 taken 56 times.
✓ Branch 103 taken 23 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
13352427 is_hiragana_char(wc) || is_hiragana_iteration(wc) ||
1465 3337484 is_ja_length_mark(wc))
1466 1218 has_quaternary_weight = true;
1467
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 4284 times.
✓ Branch 5 taken 6691499 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 83 times.
✓ Branch 13 taken 1597 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 90 times.
✓ Branch 23 taken 279 times.
✓ Branch 24 taken 90 times.
✓ Branch 25 taken 279 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
6698201 } else if (weight_lv == 3) {
1468 4547 wbeg = nochar;
1469 4547 num_of_ce_left = 0;
1470
30/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 3081 times.
✓ Branch 17 taken 1203 times.
✓ Branch 18 taken 3075 times.
✓ Branch 19 taken 6 times.
✓ Branch 20 taken 5 times.
✓ Branch 21 taken 3070 times.
✓ Branch 22 taken 1214 times.
✓ Branch 23 taken 3070 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 43 times.
✓ Branch 49 taken 40 times.
✓ Branch 50 taken 43 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 4 times.
✓ Branch 53 taken 39 times.
✓ Branch 54 taken 44 times.
✓ Branch 55 taken 39 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 57 times.
✓ Branch 89 taken 33 times.
✓ Branch 90 taken 57 times.
✗ Branch 91 not taken.
✓ Branch 92 taken 3 times.
✓ Branch 93 taken 54 times.
✓ Branch 94 taken 36 times.
✓ Branch 95 taken 54 times.
✓ Branch 96 taken 55 times.
✓ Branch 97 taken 35 times.
✓ Branch 98 taken 54 times.
✓ Branch 99 taken 1 times.
✓ Branch 100 taken 3 times.
✓ Branch 101 taken 51 times.
✓ Branch 102 taken 39 times.
✓ Branch 103 taken 51 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
7776 if (is_katakana_char(wc) || is_katakana_iteration(wc) ||
1471 3229 is_ja_length_mark(wc)) {
1472 1333 return JA_KATA_QUAT_WEIGHT;
1473
22/84
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 1398 times.
✓ Branch 13 taken 1672 times.
✓ Branch 14 taken 6 times.
✓ Branch 15 taken 1392 times.
✓ Branch 16 taken 1678 times.
✓ Branch 17 taken 1392 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✓ Branch 36 taken 12 times.
✓ Branch 37 taken 27 times.
✗ Branch 38 not taken.
✓ Branch 39 taken 12 times.
✓ Branch 40 taken 27 times.
✓ Branch 41 taken 12 times.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✓ Branch 66 taken 10 times.
✓ Branch 67 taken 44 times.
✓ Branch 68 taken 1 times.
✓ Branch 69 taken 9 times.
✓ Branch 70 taken 45 times.
✓ Branch 71 taken 9 times.
✓ Branch 72 taken 9 times.
✓ Branch 73 taken 42 times.
✗ Branch 74 not taken.
✓ Branch 75 taken 9 times.
✓ Branch 76 taken 42 times.
✓ Branch 77 taken 9 times.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
3214 } else if (is_hiragana_char(wc) || is_hiragana_iteration(wc)) {
1474 1792 return JA_HIRA_QUAT_WEIGHT;
1475 }
1476 1422 return -1;
1477 }
1478 10032336 return 0;
1479 }
1480
1481 // Generic version that can handle any number of levels.
1482 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1483 ALWAYS_INLINE int uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::next_raw() {
1484 20171736869 int remain_weight = more_weight();
1485
36/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 3577324 times.
✓ Branch 17 taken 20053073 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 103283319 times.
✓ Branch 21 taken 600846594 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 20040882 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 13362597 times.
✓ Branch 28 taken 92111265 times.
✓ Branch 29 taken 186919638 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 94401968 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 56 times.
✓ Branch 49 taken 2610 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 108 times.
✓ Branch 53 taken 61983 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 459 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 56 times.
✓ Branch 60 taken 45 times.
✓ Branch 61 taken 19281 times.
✗ Branch 62 not taken.
✓ Branch 63 taken 375695071 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 67 times.
✓ Branch 89 taken 659 times.
✓ Branch 90 taken 67 times.
✓ Branch 91 taken 659 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 297 times.
✓ Branch 95 taken 8755 times.
✓ Branch 96 taken 279 times.
✓ Branch 97 taken 8773 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 96 times.
✓ Branch 100 taken 93 times.
✓ Branch 101 taken 16113 times.
✓ Branch 102 taken 93 times.
✓ Branch 103 taken 16113 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 8214 times.
✓ Branch 107 taken 19130533695 times.
✓ Branch 108 taken 8258 times.
✓ Branch 109 taken 19177966079 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 4116 times.
39818948755 if (remain_weight >= 0) return remain_weight;
1486
1487 do {
1488 39712045605 my_wc_t wc = 0;
1489
1490 /* Get next code point */
1491
44/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 10009965 times.
✓ Branch 17 taken 10053611 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 300260109 times.
✓ Branch 21 taken 300859818 times.
✓ Branch 22 taken 10013990 times.
✓ Branch 23 taken 10036096 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 6677229 times.
✓ Branch 27 taken 6692036 times.
✓ Branch 28 taken 93427849 times.
✓ Branch 29 taken 93606617 times.
✓ Branch 30 taken 39165543 times.
✓ Branch 31 taken 55241660 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 146 times.
✓ Branch 49 taken 2476 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 270 times.
✓ Branch 53 taken 61713 times.
✓ Branch 54 taken 153 times.
✓ Branch 55 taken 306 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✓ Branch 58 taken 28 times.
✓ Branch 59 taken 28 times.
✓ Branch 60 taken 84 times.
✓ Branch 61 taken 19197 times.
✓ Branch 62 taken 151912324 times.
✓ Branch 63 taken 223783187 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 162 times.
✓ Branch 89 taken 506 times.
✓ Branch 90 taken 163 times.
✓ Branch 91 taken 505 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 2119 times.
✓ Branch 95 taken 6642 times.
✓ Branch 96 taken 2172 times.
✓ Branch 97 taken 6601 times.
✓ Branch 98 taken 39 times.
✓ Branch 99 taken 57 times.
✓ Branch 100 taken 1816 times.
✓ Branch 101 taken 14298 times.
✓ Branch 102 taken 1825 times.
✓ Branch 103 taken 14288 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 368824991 times.
✓ Branch 107 taken 18795301959 times.
✓ Branch 108 taken 367868608 times.
✓ Branch 109 taken 18868170303 times.
✓ Branch 110 taken 1599 times.
✓ Branch 111 taken 2517 times.
39712045605 int mblen = mb_wc(&wc, sbeg, send);
1492
44/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 10028397 times.
✓ Branch 17 taken 10035179 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 300813069 times.
✓ Branch 21 taken 300306858 times.
✓ Branch 22 taken 10032422 times.
✓ Branch 23 taken 10017664 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 6689517 times.
✓ Branch 27 taken 6679748 times.
✓ Branch 28 taken 93599881 times.
✓ Branch 29 taken 93434585 times.
✓ Branch 30 taken 39171687 times.
✓ Branch 31 taken 55235516 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 146 times.
✓ Branch 49 taken 2476 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 270 times.
✓ Branch 53 taken 61713 times.
✓ Branch 54 taken 153 times.
✓ Branch 55 taken 306 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✓ Branch 58 taken 28 times.
✓ Branch 59 taken 28 times.
✓ Branch 60 taken 84 times.
✓ Branch 61 taken 19197 times.
✓ Branch 62 taken 151912310 times.
✓ Branch 63 taken 223783201 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 162 times.
✓ Branch 89 taken 506 times.
✓ Branch 90 taken 163 times.
✓ Branch 91 taken 505 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 2119 times.
✓ Branch 95 taken 6642 times.
✓ Branch 96 taken 2172 times.
✓ Branch 97 taken 6601 times.
✓ Branch 98 taken 39 times.
✓ Branch 99 taken 57 times.
✓ Branch 100 taken 1816 times.
✓ Branch 101 taken 14298 times.
✓ Branch 102 taken 1825 times.
✓ Branch 103 taken 14288 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 368811613 times.
✓ Branch 107 taken 18795315337 times.
✓ Branch 108 taken 367858504 times.
✓ Branch 109 taken 18868180407 times.
✓ Branch 110 taken 1599 times.
✓ Branch 111 taken 2517 times.
39712045605 if (mblen <= 0) {
1493 if (LEVELS_FOR_COMPARE == 1) {
1494 1021355678 ++weight_lv;
1495 1757075983 return -1;
1496 }
1497
1498
30/84
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 10027641 times.
✓ Branch 13 taken 756 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 200542232 times.
✓ Branch 17 taken 100270837 times.
✓ Branch 18 taken 6688324 times.
✓ Branch 19 taken 3344098 times.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 3344797 times.
✓ Branch 23 taken 3344720 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✓ Branch 36 taken 114 times.
✓ Branch 37 taken 32 times.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 180 times.
✓ Branch 41 taken 90 times.
✓ Branch 42 taken 102 times.
✓ Branch 43 taken 51 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✓ Branch 46 taken 14 times.
✓ Branch 47 taken 14 times.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✓ Branch 66 taken 131 times.
✓ Branch 67 taken 31 times.
✓ Branch 68 taken 132 times.
✓ Branch 69 taken 31 times.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✓ Branch 72 taken 1866 times.
✓ Branch 73 taken 253 times.
✓ Branch 74 taken 1914 times.
✓ Branch 75 taken 258 times.
✓ Branch 76 taken 30 times.
✓ Branch 77 taken 9 times.
✓ Branch 78 taken 931 times.
✓ Branch 79 taken 885 times.
✓ Branch 80 taken 940 times.
✓ Branch 81 taken 885 times.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
327572298 if (++weight_lv < LEVELS_FOR_COMPARE) {
1499
4/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 10027641 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 114 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 131 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 132 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
10028018 if (LEVELS_FOR_COMPARE == 4 && cs->coll_param == &ja_coll_param) {
1500 // Return directly if we don't have quaternary weight.
1501
16/56
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 3342459 times.
✓ Branch 9 taken 6685182 times.
✓ Branch 10 taken 3341646 times.
✓ Branch 11 taken 813 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 38 times.
✓ Branch 25 taken 76 times.
✓ Branch 26 taken 6 times.
✓ Branch 27 taken 32 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✓ Branch 44 taken 43 times.
✓ Branch 45 taken 88 times.
✓ Branch 46 taken 3 times.
✓ Branch 47 taken 40 times.
✓ Branch 48 taken 43 times.
✓ Branch 49 taken 89 times.
✓ Branch 50 taken 3 times.
✓ Branch 51 taken 40 times.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
10028018 if (weight_lv == 3 && !has_quaternary_weight) return -1;
1502 }
1503 /*
1504 Restart scanning from the beginning of the string, and add
1505 a level separator.
1506 */
1507 217267690 sbeg = sbeg_dup;
1508 217267690 return 0;
1509 }
1510
1511 // If we don't have any more levels left, we're done.
1512 106962950 return -1;
1513 }
1514
1515 38363117629 sbeg += mblen;
1516
22/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 10035179 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✓ Branch 21 taken 300306858 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 10017664 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 6679748 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 93434585 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 55235516 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 2476 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✓ Branch 53 taken 61713 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 306 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 28 times.
✗ Branch 60 not taken.
✓ Branch 61 taken 19197 times.
✗ Branch 62 not taken.
✓ Branch 63 taken 223783201 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✓ Branch 89 taken 506 times.
✗ Branch 90 not taken.
✓ Branch 91 taken 505 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✓ Branch 95 taken 6642 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 6601 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 57 times.
✗ Branch 100 not taken.
✓ Branch 101 taken 14298 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 14288 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✓ Branch 107 taken 18795315337 times.
✗ Branch 108 not taken.
✓ Branch 109 taken 18868180407 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 2517 times.
38363117629 assert(wc <= uca->maxchar); // mb_wc() has already checked this.
1517
1518
31/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 10035179 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 130159215 times.
✓ Branch 21 taken 170147643 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 10017664 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 6679748 times.
✓ Branch 28 taken 36705833 times.
✓ Branch 29 taken 56728752 times.
✓ Branch 30 taken 604 times.
✓ Branch 31 taken 55235078 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 2476 times.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 26676 times.
✓ Branch 53 taken 35037 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 306 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 28 times.
✓ Branch 60 taken 7518 times.
✓ Branch 61 taken 11679 times.
✓ Branch 62 taken 18 times.
✓ Branch 63 taken 223782848 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 506 times.
✗ Branch 89 not taken.
✓ Branch 90 taken 505 times.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 121 times.
✓ Branch 95 taken 6521 times.
✓ Branch 96 taken 121 times.
✓ Branch 97 taken 6480 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 57 times.
✗ Branch 100 not taken.
✓ Branch 101 taken 14298 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 14288 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 51445150 times.
✓ Branch 107 taken 18750560320 times.
✗ Branch 108 not taken.
✓ Branch 109 taken 18815643420 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 2517 times.
38363117629 if (my_uca_have_contractions(uca)) {
1519 const uint16 *cweight;
1520 /*
1521 If we have scanned a code point which can have previous context,
1522 and there were some more code points already before,
1523 then verify that {prev_char, wc} together form
1524 a real previous context pair.
1525 Note, we support only 2-character long sequences with previous
1526 context at the moment. CLDR does not have longer sequences.
1527 CLDR doesn't have previous context rule whose first character is
1528 0x0000, so the initial value (0) of prev_char won't break the logic.
1529 */
1530
12/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 1613 times.
✓ Branch 17 taken 12263 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 639 times.
✓ Branch 21 taken 12240 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 56 times.
✓ Branch 49 taken 16 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 57 times.
✓ Branch 89 taken 16 times.
✓ Branch 90 taken 57 times.
✓ Branch 91 taken 21 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✓ Branch 95 taken 3 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 3 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
213602186 if (my_uca_can_be_previous_context_tail(uca->contraction_flags, wc) &&
1531 26984 my_uca_can_be_previous_context_head(uca->contraction_flags,
1532
36/224
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 13876 times.
✓ Branch 33 taken 10021303 times.
✓ Branch 34 taken 1613 times.
✓ Branch 35 taken 10033566 times.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 12879 times.
✓ Branch 41 taken 130146336 times.
✓ Branch 42 taken 639 times.
✓ Branch 43 taken 130158576 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✓ Branch 57 taken 36705833 times.
✗ Branch 58 not taken.
✓ Branch 59 taken 36705833 times.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✓ Branch 96 taken 72 times.
✓ Branch 97 taken 2404 times.
✓ Branch 98 taken 56 times.
✓ Branch 99 taken 2420 times.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✓ Branch 105 taken 26676 times.
✗ Branch 106 not taken.
✓ Branch 107 taken 26676 times.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✓ Branch 121 taken 7518 times.
✗ Branch 122 not taken.
✓ Branch 123 taken 7518 times.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✓ Branch 176 taken 73 times.
✓ Branch 177 taken 433 times.
✓ Branch 178 taken 57 times.
✓ Branch 179 taken 449 times.
✓ Branch 180 taken 78 times.
✓ Branch 181 taken 427 times.
✓ Branch 182 taken 57 times.
✓ Branch 183 taken 448 times.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✓ Branch 188 taken 3 times.
✓ Branch 189 taken 118 times.
✗ Branch 190 not taken.
✓ Branch 191 taken 121 times.
✓ Branch 192 taken 3 times.
✓ Branch 193 taken 118 times.
✗ Branch 194 not taken.
✓ Branch 195 taken 121 times.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✓ Branch 213 taken 5140 times.
✗ Branch 214 not taken.
✓ Branch 215 taken 5140 times.
✗ Branch 216 not taken.
✓ Branch 217 taken 5231 times.
✗ Branch 218 not taken.
✓ Branch 219 taken 5231 times.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
176975505 prev_char) &&
1533
10/224
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 1613 times.
✗ Branch 33 not taken.
✓ Branch 34 taken 1613 times.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 639 times.
✗ Branch 41 not taken.
✓ Branch 42 taken 639 times.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✓ Branch 96 taken 56 times.
✗ Branch 97 not taken.
✓ Branch 98 taken 56 times.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✓ Branch 176 taken 57 times.
✗ Branch 177 not taken.
✓ Branch 178 taken 57 times.
✗ Branch 179 not taken.
✓ Branch 180 taken 57 times.
✗ Branch 181 not taken.
✓ Branch 182 taken 57 times.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
4844 (cweight = previous_context_find(prev_char, wc))) {
1534 // For Japanese kana-sensitive collation.
1535
4/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1613 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 56 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 57 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 57 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
1783 if (LEVELS_FOR_COMPARE == 4 && cs->coll_param == &ja_coll_param) {
1536 1783 int quat_wt = handle_ja_contraction_quat_wt();
1537 1783 prev_char = 0;
1538
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 224 times.
✓ Branch 5 taken 1389 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 14 times.
✓ Branch 13 taken 42 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 12 times.
✓ Branch 23 taken 45 times.
✓ Branch 24 taken 12 times.
✓ Branch 25 taken 45 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
1783 if (quat_wt > 0) return quat_wt;
1539 }
1540 2160 prev_char = 0; /* Clear for the next code point */
1541 2160 return *cweight;
1542
18/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 10033566 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 145311 times.
✓ Branch 21 taken 130013265 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✓ Branch 28 taken 40577 times.
✓ Branch 29 taken 36665256 times.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 2420 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 2421 times.
✓ Branch 53 taken 24255 times.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 717 times.
✓ Branch 61 taken 6801 times.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✓ Branch 89 taken 449 times.
✗ Branch 90 not taken.
✓ Branch 91 taken 448 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✓ Branch 95 taken 121 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 121 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 354 times.
✓ Branch 107 taken 4786 times.
✓ Branch 108 taken 312 times.
✓ Branch 109 taken 4919 times.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
176946099 } else if (my_uca_can_be_contraction_head(uca->contraction_flags, wc)) {
1543 /* Check if wc starts a contraction */
1544 size_t chars_skipped; // Ignored.
1545
18/224
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 145311 times.
✗ Branch 41 not taken.
✓ Branch 42 taken 402 times.
✓ Branch 43 taken 144909 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✓ Branch 56 taken 40577 times.
✗ Branch 57 not taken.
✓ Branch 58 taken 139 times.
✓ Branch 59 taken 40438 times.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✓ Branch 104 taken 2421 times.
✗ Branch 105 not taken.
✓ Branch 106 taken 108 times.
✓ Branch 107 taken 2313 times.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✓ Branch 120 taken 717 times.
✗ Branch 121 not taken.
✓ Branch 122 taken 36 times.
✓ Branch 123 taken 681 times.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✓ Branch 212 taken 354 times.
✗ Branch 213 not taken.
✓ Branch 214 taken 104 times.
✓ Branch 215 taken 250 times.
✓ Branch 216 taken 312 times.
✗ Branch 217 not taken.
✓ Branch 218 taken 101 times.
✓ Branch 219 taken 211 times.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
189692 if ((cweight = contraction_find(wc, &chars_skipped))) return *cweight;
1546 }
1547 176945209 prev_char = wc;
1548 }
1549
1550 // For Japanese kana-sensitive collation.
1551
4/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 10033566 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 2420 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 449 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 448 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
10036883 if (LEVELS_FOR_COMPARE == 4 && cs->coll_param == &ja_coll_param) {
1552
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 3345121 times.
✓ Branch 5 taken 6688445 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 779 times.
✓ Branch 13 taken 1641 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 134 times.
✓ Branch 23 taken 315 times.
✓ Branch 24 taken 133 times.
✓ Branch 25 taken 315 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
10036883 int quat_wt = handle_ja_common_quat_wt(wc);
1553
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1392 times.
✓ Branch 5 taken 10032174 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 12 times.
✓ Branch 13 taken 2408 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 9 times.
✓ Branch 23 taken 440 times.
✓ Branch 24 taken 9 times.
✓ Branch 25 taken 439 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
10036883 if (quat_wt == -1)
1554 1422 continue;
1555
8/28
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 2892 times.
✓ Branch 5 taken 10029282 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 71 times.
✓ Branch 13 taken 2337 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 81 times.
✓ Branch 23 taken 359 times.
✓ Branch 24 taken 81 times.
✓ Branch 25 taken 358 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
10035461 else if (quat_wt)
1556 3125 return quat_wt;
1557 }
1558 /* Process single code point */
1559 38265827346 uint page = wc >> 8;
1560 38265827346 uint code = wc & 0xFF;
1561
1562 /* If weight page for wc does not exist, then calculate algorithmically */
1563 38265827346 const uint16 *wpage = uca->weights[page];
1564
35/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 9476838 times.
✓ Branch 17 taken 552444 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 289097553 times.
✓ Branch 21 taken 11208264 times.
✓ Branch 22 taken 9665721 times.
✓ Branch 23 taken 351943 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 6444000 times.
✓ Branch 27 taken 235748 times.
✓ Branch 28 taken 90209280 times.
✓ Branch 29 taken 3225166 times.
✓ Branch 30 taken 3228772 times.
✓ Branch 31 taken 52006306 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 2337 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✓ Branch 53 taken 61605 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 306 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 28 times.
✗ Branch 60 not taken.
✓ Branch 61 taken 19161 times.
✓ Branch 62 taken 26 times.
✓ Branch 63 taken 223782822 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✓ Branch 89 taken 359 times.
✗ Branch 90 not taken.
✓ Branch 91 taken 358 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 142 times.
✓ Branch 95 taken 6500 times.
✓ Branch 96 taken 119 times.
✓ Branch 97 taken 6482 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 57 times.
✓ Branch 100 taken 138 times.
✓ Branch 101 taken 14160 times.
✓ Branch 102 taken 117 times.
✓ Branch 103 taken 14171 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 8035 times.
✓ Branch 107 taken 18750557321 times.
✓ Branch 108 taken 10829 times.
✓ Branch 109 taken 18815637721 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 2517 times.
38673968916 if (!wpage) return next_implicit(wc);
1565
1566 /* Calculate pointer to wc's weight, using page and offset */
1567 37857685776 wbeg = UCA900_WEIGHT_ADDR(wpage, weight_lv, code);
1568 37857685776 wbeg_stride = UCA900_DISTANCE_BETWEEN_WEIGHTS;
1569 555498 num_of_ce_left = UCA900_NUM_OF_CE(wpage, code);
1570
36/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 10503 times.
✓ Branch 17 taken 543333 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 273333 times.
✓ Branch 21 taken 10934931 times.
✓ Branch 22 taken 9204 times.
✓ Branch 23 taken 342739 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 6668 times.
✓ Branch 27 taken 229080 times.
✓ Branch 28 taken 114828 times.
✓ Branch 29 taken 3110338 times.
✓ Branch 30 taken 5235 times.
✓ Branch 31 taken 52001071 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 12 times.
✓ Branch 49 taken 2337 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✓ Branch 53 taken 61605 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 306 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 28 times.
✗ Branch 60 not taken.
✓ Branch 61 taken 19161 times.
✓ Branch 62 taken 440 times.
✓ Branch 63 taken 223782382 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 9 times.
✓ Branch 89 taken 359 times.
✓ Branch 90 taken 9 times.
✓ Branch 91 taken 358 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 6 times.
✓ Branch 95 taken 6494 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 6482 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 57 times.
✓ Branch 100 taken 1 times.
✓ Branch 101 taken 14159 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 14171 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 33593255 times.
✓ Branch 107 taken 18716964066 times.
✓ Branch 108 taken 58072832 times.
✓ Branch 109 taken 18757564889 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 2517 times.
37857687198 } while (!wbeg[0]); /* Skip ignorable code points */
1571
1572 37765600863 uint16 rtn = *wbeg;
1573 37765600863 wbeg += wbeg_stride;
1574 37765600863 --num_of_ce_left;
1575 37765600863 return rtn;
1576 }
1577
1578 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1579 template <class T, class U>
1580 ALWAYS_INLINE void uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::for_each_weight(
1581 T func, U preaccept_data) {
1582
13/64
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 3344224 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✓ Branch 23 taken 3344224 times.
✓ Branch 24 taken 3344870 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 3344870 times.
✓ Branch 28 taken 43555055 times.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 43555064 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 51 times.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✓ Branch 55 taken 51 times.
✓ Branch 56 taken 14 times.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 14 times.
✓ Branch 60 taken 151912063 times.
✗ Branch 61 not taken.
✓ Branch 62 taken 105 times.
✓ Branch 63 taken 151911958 times.
202156217 if (cs->tailoring || cs->mbminlen != 1 || cs->coll_param) {
1583 // Slower, generic path.
1584 int s_res;
1585
12/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 20287995 times.
✓ Branch 9 taken 3342402 times.
✓ Branch 10 taken 603859076 times.
✓ Branch 11 taken 100270837 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✓ Branch 14 taken 185431022 times.
✓ Branch 15 taken 93599881 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 2628 times.
✓ Branch 25 taken 38 times.
✓ Branch 26 taken 62001 times.
✓ Branch 27 taken 90 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 19242 times.
✓ Branch 31 taken 84 times.
1204089331 while ((s_res = next()) >= 0) {
1586
11/48
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 20287995 times.
✗ Branch 17 not taken.
✓ Branch 18 taken 333 times.
✓ Branch 19 taken 20287662 times.
✓ Branch 20 taken 603859076 times.
✗ Branch 21 not taken.
✓ Branch 22 taken 498 times.
✓ Branch 23 taken 603858578 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✓ Branch 28 taken 185431022 times.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 185431022 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✓ Branch 41 taken 2628 times.
✗ Branch 42 not taken.
✓ Branch 43 taken 62001 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✓ Branch 47 taken 19242 times.
809661964 if (!func(s_res, s_res == 0)) return;
1587 }
1588 197213332 return;
1589 }
1590
1591 /*
1592 Fast path. TODO: See if we can accept some character sets
1593 with tailorings.
1594 */
1595 202156181 const uint16 *ascii_wpage =
1596 202156181 UCA900_WEIGHT_ADDR(uca->weights[0], /*level=*/weight_lv, /*subcode=*/0);
1597
1598 /*
1599 Precalculate the limit for the fast path below, taking care not to form
1600 pointers that are before sbeg, as those cannot be legally compared.
1601 (In particular, this catches the case of sbeg == send == nullptr.)
1602 */
1603
12/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 3145988 times.
✓ Branch 11 taken 198236 times.
✓ Branch 12 taken 3146566 times.
✓ Branch 13 taken 198304 times.
✓ Branch 14 taken 34434167 times.
✓ Branch 15 taken 9120897 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 28 times.
✓ Branch 27 taken 23 times.
✓ Branch 28 taken 6 times.
✓ Branch 29 taken 8 times.
✓ Branch 30 taken 124853839 times.
✓ Branch 31 taken 27058119 times.
202156181 const uchar *send_local = (send - sbeg > 3) ? (send - 3) : sbeg;
1604
1605 301346658 for (;;) {
1606 /*
1607 We could have more weights left from the previous call to next()
1608 (if any) that we need to deal with.
1609 */
1610 int s_res;
1611
11/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 3444795 times.
✓ Branch 11 taken 20040882 times.
✓ Branch 12 taken 3367697 times.
✓ Branch 13 taken 13362597 times.
✓ Branch 14 taken 3296700 times.
✓ Branch 15 taken 94403414 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 4 times.
✓ Branch 27 taken 459 times.
✓ Branch 28 taken 2 times.
✓ Branch 29 taken 56 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 375695147 times.
513612024 while ((s_res = more_weight()) >= 0) {
1612
11/48
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 3444795 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✓ Branch 23 taken 3444795 times.
✓ Branch 24 taken 3367697 times.
✗ Branch 25 not taken.
✓ Branch 26 taken 24 times.
✓ Branch 27 taken 3367673 times.
✓ Branch 28 taken 3296694 times.
✗ Branch 29 not taken.
✓ Branch 30 taken 9 times.
✓ Branch 31 taken 3296685 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✓ Branch 43 taken 4 times.
✗ Branch 44 not taken.
✓ Branch 45 taken 2 times.
✗ Branch 46 not taken.
✓ Branch 47 taken 26 times.
10109069 if (!func(s_res, s_res == 0)) return;
1613 }
1614
1615 /*
1616 Loop in a simple fast path as long as we only have non-ignorable
1617 ASCII characters. These characters always have exactly a single weight
1618 and consist of only a single byte, so we can skip a lot of the checks
1619 we'd otherwise have to do.
1620 */
1621 503502555 const uchar *sbeg_local = sbeg;
1622
33/96
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 9441582 times.
✓ Branch 31 taken 10601019 times.
✓ Branch 32 taken 9441349 times.
✓ Branch 33 taken 233 times.
✓ Branch 34 taken 9441349 times.
✓ Branch 35 taken 10601252 times.
✓ Branch 36 taken 6296078 times.
✓ Branch 37 taken 7068789 times.
✓ Branch 38 taken 6295751 times.
✓ Branch 39 taken 327 times.
✓ Branch 40 taken 6295751 times.
✓ Branch 41 taken 7069116 times.
✓ Branch 42 taken 297507442 times.
✓ Branch 43 taken 88162919 times.
✓ Branch 44 taken 294430502 times.
✓ Branch 45 taken 3076873 times.
✓ Branch 46 taken 294430502 times.
✓ Branch 47 taken 91239792 times.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✓ Branch 78 taken 606 times.
✓ Branch 79 taken 402 times.
✓ Branch 80 taken 606 times.
✗ Branch 81 not taken.
✓ Branch 82 taken 606 times.
✓ Branch 83 taken 402 times.
✓ Branch 84 taken 348 times.
✓ Branch 85 taken 50 times.
✓ Branch 86 taken 348 times.
✗ Branch 87 not taken.
✓ Branch 88 taken 348 times.
✓ Branch 89 taken 50 times.
✓ Branch 90 taken 473259778 times.
✓ Branch 91 taken 375694112 times.
✓ Branch 92 taken 473260155 times.
✗ Branch 93 not taken.
✓ Branch 94 taken 473260265 times.
✓ Branch 95 taken 375693910 times.
1268033125 while (sbeg_local < send_local && preaccept_data(sizeof(uint32))) {
1623 /*
1624 Check if all four bytes are in the range 0x20..0x7e, inclusive.
1625 These have exactly one weight. Note that this unfortunately does not
1626 include tab and newline, which would otherwise be legal candidates.
1627
1628 See the FastOutOfRange unit test for verification that the bitfiddling
1629 trick used here is correct.
1630 */
1631 uint32 four_bytes;
1632 783428821 memcpy(&four_bytes, sbeg_local, sizeof(four_bytes));
1633
12/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 1719 times.
✓ Branch 11 taken 9439630 times.
✓ Branch 12 taken 2270 times.
✓ Branch 13 taken 6293481 times.
✓ Branch 14 taken 291274850 times.
✓ Branch 15 taken 3155652 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 549 times.
✓ Branch 27 taken 57 times.
✓ Branch 28 taken 342 times.
✓ Branch 29 taken 6 times.
✓ Branch 30 taken 473259061 times.
✓ Branch 31 taken 1204 times.
783428821 if (((four_bytes + 0x01010101u) & 0x80808080) ||
1634
7/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 1719 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 2270 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 291266947 times.
✓ Branch 15 taken 7903 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 549 times.
✗ Branch 27 not taken.
✓ Branch 28 taken 342 times.
✗ Branch 29 not taken.
✓ Branch 30 taken 473259130 times.
✗ Branch 31 not taken.
764538791 ((four_bytes - 0x20202020u) & 0x80808080))
1635 break;
1636 764530957 const int s_res0 = ascii_wpage[sbeg_local[0]];
1637 764530957 const int s_res1 = ascii_wpage[sbeg_local[1]];
1638 764530957 const int s_res2 = ascii_wpage[sbeg_local[2]];
1639 764530957 const int s_res3 = ascii_wpage[sbeg_local[3]];
1640
6/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1719 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 2270 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 291266947 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 549 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 342 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 473259130 times.
764530957 assert(s_res0 != 0);
1641
6/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1719 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 2270 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 291266947 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 549 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 342 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 473259130 times.
764530957 assert(s_res1 != 0);
1642
6/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1719 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 2270 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 291266947 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 549 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 342 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 473259130 times.
764530957 assert(s_res2 != 0);
1643
6/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1719 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 2270 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 291266947 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 549 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 342 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 473259130 times.
764530957 assert(s_res3 != 0);
1644
3/16
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 1719 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 2270 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 291266947 times.
✗ Branch 15 not taken.
764530957 func(s_res0, /*is_level_separator=*/false);
1645
3/16
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 1719 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 2270 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 291266947 times.
✗ Branch 15 not taken.
764529713 func(s_res1, /*is_level_separator=*/false);
1646
3/16
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 1719 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 2270 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 291266947 times.
✗ Branch 15 not taken.
764529965 func(s_res2, /*is_level_separator=*/false);
1647
3/16
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 1719 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 2270 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 291266947 times.
✗ Branch 15 not taken.
764530246 func(s_res3, /*is_level_separator=*/false);
1648 764530570 sbeg_local += sizeof(uint32);
1649 }
1650 503502386 sbeg = sbeg_local;
1651
1652 // Do a single code point in the generic path.
1653 503500242 s_res = next_raw();
1654
10/32
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 6688324 times.
✓ Branch 11 taken 13352558 times.
✓ Branch 12 taken 3344797 times.
✓ Branch 13 taken 10017800 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 94401530 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 102 times.
✓ Branch 27 taken 357 times.
✓ Branch 28 taken 14 times.
✓ Branch 29 taken 42 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 375694718 times.
503500242 if (s_res == 0) {
1655 // Level separator, so we have to update our page pointer.
1656 10033237 ascii_wpage += UCA900_DISTANCE_BETWEEN_LEVELS;
1657 }
1658
37/112
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 16696784 times.
✓ Branch 41 taken 3344098 times.
✓ Branch 42 taken 16696784 times.
✗ Branch 43 not taken.
✓ Branch 44 taken 126 times.
✓ Branch 45 taken 16696658 times.
✓ Branch 46 taken 3344224 times.
✓ Branch 47 taken 16696658 times.
✓ Branch 48 taken 10017877 times.
✓ Branch 49 taken 3344720 times.
✓ Branch 50 taken 10017877 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 126 times.
✓ Branch 53 taken 10017751 times.
✓ Branch 54 taken 3344846 times.
✓ Branch 55 taken 10017751 times.
✓ Branch 56 taken 55231120 times.
✓ Branch 57 taken 39170410 times.
✓ Branch 58 taken 55230804 times.
✗ Branch 59 not taken.
✓ Branch 60 taken 4381979 times.
✓ Branch 61 taken 50848825 times.
✓ Branch 62 taken 43552402 times.
✓ Branch 63 taken 50848812 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 408 times.
✓ Branch 95 taken 51 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 408 times.
✓ Branch 98 taken 51 times.
✓ Branch 99 taken 408 times.
✓ Branch 100 taken 42 times.
✓ Branch 101 taken 14 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 42 times.
✓ Branch 104 taken 14 times.
✓ Branch 105 taken 42 times.
✓ Branch 106 taken 223782760 times.
✓ Branch 107 taken 151911958 times.
✓ Branch 108 taken 320 times.
✓ Branch 109 taken 223782703 times.
✓ Branch 110 taken 151911994 times.
✓ Branch 111 taken 223782987 times.
503500242 if (s_res < 0 || !func(s_res, s_res == 0)) return;
1659 }
1660 }
1661
1662 /**
1663 Change a weight according to the reorder parameters.
1664 @param weight The weight to change
1665 @retval reordered weight
1666 */
1667 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1668 199005436 uint16 uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::apply_reorder_param(
1669 uint16 weight) {
1670 /*
1671 Chinese collation's reordering is done in next_implicit() and
1672 modify_all_zh_pages(). See the comment on zh_reorder_param and
1673 change_zh_implicit().
1674 */
1675
2/2
✓ Branch 0 taken 6502021 times.
✓ Branch 1 taken 93000697 times.
199005436 if (cs->coll_param == &zh_coll_param) return weight;
1676 186001394 const Reorder_param *param = cs->coll_param->reorder_param;
1677
4/4
✓ Branch 0 taken 92630596 times.
✓ Branch 1 taken 370101 times.
✓ Branch 2 taken 787196 times.
✓ Branch 3 taken 91843400 times.
186001394 if (weight >= START_WEIGHT_TO_REORDER && weight <= param->max_weight) {
1678
1/2
✓ Branch 0 taken 2182167 times.
✗ Branch 1 not taken.
4364334 for (int rec_ind = 0; rec_ind < param->wt_rec_num; ++rec_ind) {
1679 4364334 const Reorder_wt_rec *wt_rec = param->wt_rec + rec_ind;
1680
2/2
✓ Branch 0 taken 1569824 times.
✓ Branch 1 taken 612343 times.
4364334 if (weight >= wt_rec->old_wt_bdy.begin &&
1681
2/2
✓ Branch 0 taken 787196 times.
✓ Branch 1 taken 782628 times.
3139648 weight <= wt_rec->old_wt_bdy.end) {
1682 /*
1683 As commented in adjust_japanese_weight(), if this is a Japanese
1684 collation, for characters whose weight is between Latin and Kana
1685 group, and for the characters whose weight is between Kana and
1686 Han, we need to change their weight to be after all Han
1687 characters. We decide to give them the weights [FB86 0000 0000]
1688 [origin weight] to make sure the new weights are greater than
1689 the maximum implicit weight of Han characters. If this character's
1690 origin weight has more than one non-ignorable primary weight, for
1691 example, [AAAA 0020 0002][BBBB 0020 0002], both AAAA and BBBB need
1692 to be changed. The new weight should be:
1693 [FB86 0000 0000][AAAA 0020 0002][FB86 0000 0000][BBBB 0020 0002].
1694 */
1695
4/4
✓ Branch 0 taken 635782 times.
✓ Branch 1 taken 151414 times.
✓ Branch 2 taken 604098 times.
✓ Branch 3 taken 31684 times.
1574392 if (param == &ja_reorder_param && wt_rec->new_wt_bdy.begin == 0) {
1696 1208196 return_origin_weight = !return_origin_weight;
1697
2/2
✓ Branch 0 taken 302028 times.
✓ Branch 1 taken 302070 times.
1208196 if (return_origin_weight) break;
1698
1699 /*
1700 We didn't consume the weight; rewind the iterator, so we will
1701 get another call where we can output it.
1702 */
1703 604140 wbeg -= wbeg_stride;
1704 604140 ++num_of_ce_left;
1705 604140 return 0xFB86;
1706 }
1707
1708 // Regular (non-Japanese-specific) reordering.
1709 366196 return weight - wt_rec->old_wt_bdy.begin + wt_rec->new_wt_bdy.begin;
1710 }
1711 }
1712 }
1713 185031058 return weight;
1714 }
1715
1716 // See Unicode TR35 section 3.14.1.
1717 3415893 static bool is_tertiary_weight_upper_case(uint16 weight) {
1718
10/10
✓ Branch 0 taken 17050 times.
✓ Branch 1 taken 3398843 times.
✓ Branch 2 taken 8787 times.
✓ Branch 3 taken 8263 times.
✓ Branch 4 taken 3407405 times.
✓ Branch 5 taken 225 times.
✓ Branch 6 taken 3407168 times.
✓ Branch 7 taken 237 times.
✓ Branch 8 taken 3406829 times.
✓ Branch 9 taken 339 times.
3415893 if ((weight >= 0x08 && weight <= 0x0C) || weight == 0x0E || weight == 0x11 ||
1719
2/2
✓ Branch 0 taken 684 times.
✓ Branch 1 taken 3406145 times.
3406829 weight == 0x12 || weight == 0x1D)
1720 9748 return true;
1721 3406145 return false;
1722 }
1723
1724 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1725 26889090 uint16 uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::apply_case_first(
1726 uint16 weight) {
1727 /*
1728 We only apply case weight change here when the character is not tailored.
1729 Tailored character's case weight has been changed in
1730 my_char_weight_put_900().
1731 We have only 1 collation (Danish) needs to implement [caseFirst upper].
1732 */
1733
5/6
✓ Branch 0 taken 13444545 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3410862 times.
✓ Branch 3 taken 10033683 times.
✓ Branch 4 taken 3410555 times.
✓ Branch 5 taken 307 times.
26889090 if (cs->coll_param->case_first == CASE_FIRST_UPPER && weight_lv == 2 &&
1734 weight < 0x20) {
1735
2/2
✓ Branch 0 taken 7079 times.
✓ Branch 1 taken 3403476 times.
6821110 if (is_tertiary_weight_upper_case(weight))
1736 14158 weight |= CASE_FIRST_UPPER_MASK;
1737 else
1738 6806952 weight |= CASE_FIRST_LOWER_MASK;
1739 }
1740 26889090 return weight;
1741 }
1742
1743 template <class Mb_wc, int LEVELS_FOR_COMPARE>
1744 ALWAYS_INLINE int uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::next() {
1745 39218166089 int res = next_raw();
1746 39218166089 Coll_param *param = cs->coll_param;
1747
56/160
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 13602000 times.
✓ Branch 17 taken 10028397 times.
✓ Branch 18 taken 13602000 times.
✗ Branch 19 not taken.
✓ Branch 20 taken 403316844 times.
✓ Branch 21 taken 300813069 times.
✓ Branch 22 taken 161350512 times.
✓ Branch 23 taken 241966332 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✓ Branch 28 taken 185431022 times.
✓ Branch 29 taken 93599881 times.
✓ Branch 30 taken 39732257 times.
✓ Branch 31 taken 145698765 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 2520 times.
✓ Branch 49 taken 146 times.
✓ Branch 50 taken 2520 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 61821 times.
✓ Branch 53 taken 270 times.
✓ Branch 54 taken 24732 times.
✓ Branch 55 taken 37089 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 19242 times.
✓ Branch 61 taken 84 times.
✓ Branch 62 taken 4122 times.
✓ Branch 63 taken 15120 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✓ Branch 112 taken 564 times.
✓ Branch 113 taken 162 times.
✓ Branch 114 taken 564 times.
✗ Branch 115 not taken.
✓ Branch 116 taken 563 times.
✓ Branch 117 taken 163 times.
✓ Branch 118 taken 563 times.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✓ Branch 124 taken 6933 times.
✓ Branch 125 taken 2119 times.
✓ Branch 126 taken 472 times.
✓ Branch 127 taken 6461 times.
✓ Branch 128 taken 6880 times.
✓ Branch 129 taken 2172 times.
✓ Branch 130 taken 456 times.
✓ Branch 131 taken 6424 times.
✓ Branch 132 taken 57 times.
✓ Branch 133 taken 39 times.
✗ Branch 134 not taken.
✓ Branch 135 taken 57 times.
✓ Branch 136 taken 14390 times.
✓ Branch 137 taken 1816 times.
✗ Branch 138 not taken.
✓ Branch 139 taken 14390 times.
✓ Branch 140 taken 14381 times.
✓ Branch 141 taken 1825 times.
✗ Branch 142 not taken.
✓ Branch 143 taken 14381 times.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✓ Branch 148 taken 18739260764 times.
✓ Branch 149 taken 346531268 times.
✓ Branch 150 taken 783 times.
✓ Branch 151 taken 18739259981 times.
✓ Branch 152 taken 18752629280 times.
✓ Branch 153 taken 372813301 times.
✓ Branch 154 taken 800 times.
✓ Branch 155 taken 18752628480 times.
✓ Branch 156 taken 2517 times.
✓ Branch 157 taken 1599 times.
✗ Branch 158 not taken.
✓ Branch 159 taken 2517 times.
39218166089 if (res > 0 && param) {
1748 /* Reorder weight change only on primary level. */
1749
36/160
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 13602000 times.
✗ Branch 17 not taken.
✓ Branch 18 taken 6764423 times.
✓ Branch 19 taken 6837577 times.
✓ Branch 20 taken 107580689 times.
✓ Branch 21 taken 53769823 times.
✓ Branch 22 taken 52993719 times.
✓ Branch 23 taken 54586970 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✓ Branch 28 taken 39732257 times.
✗ Branch 29 not taken.
✓ Branch 30 taken 39732257 times.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 2520 times.
✗ Branch 49 not taken.
✓ Branch 50 taken 793 times.
✓ Branch 51 taken 1727 times.
✓ Branch 52 taken 16488 times.
✓ Branch 53 taken 8244 times.
✓ Branch 54 taken 5496 times.
✓ Branch 55 taken 10992 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 4122 times.
✗ Branch 61 not taken.
✓ Branch 62 taken 4122 times.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✓ Branch 112 taken 564 times.
✗ Branch 113 not taken.
✓ Branch 114 taken 149 times.
✓ Branch 115 taken 415 times.
✓ Branch 116 taken 563 times.
✗ Branch 117 not taken.
✓ Branch 118 taken 148 times.
✓ Branch 119 taken 415 times.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✓ Branch 124 taken 41 times.
✓ Branch 125 taken 431 times.
✓ Branch 126 taken 14 times.
✓ Branch 127 taken 27 times.
✓ Branch 128 taken 45 times.
✓ Branch 129 taken 411 times.
✓ Branch 130 taken 14 times.
✓ Branch 131 taken 31 times.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✓ Branch 148 taken 783 times.
✗ Branch 149 not taken.
✓ Branch 150 taken 783 times.
✗ Branch 151 not taken.
✓ Branch 152 taken 800 times.
✗ Branch 153 not taken.
✓ Branch 154 taken 800 times.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
214719781 if (param->reorder_param && weight_lv == 0) res = apply_reorder_param(res);
1750
16/80
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 13602000 times.
✓ Branch 10 taken 13442191 times.
✓ Branch 11 taken 147908321 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✓ Branch 15 taken 39732257 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 2520 times.
✓ Branch 26 taken 2061 times.
✓ Branch 27 taken 22671 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 4122 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✓ Branch 57 taken 564 times.
✗ Branch 58 not taken.
✓ Branch 59 taken 563 times.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✓ Branch 62 taken 151 times.
✓ Branch 63 taken 321 times.
✓ Branch 64 taken 142 times.
✓ Branch 65 taken 314 times.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✓ Branch 75 taken 783 times.
✗ Branch 76 not taken.
✓ Branch 77 taken 800 times.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
214719781 if (param->case_first != CASE_FIRST_OFF) res = apply_case_first(res);
1751 }
1752 39218166089 return res;
1753 }
1754
1755 /*
1756 Compares two strings according to the collation
1757
1758 SYNOPSIS:
1759 my_strnncoll_uca()
1760 cs Character set information
1761 s First string
1762 slen First string length
1763 t Second string
1764 tlen Second string length
1765
1766 NOTES:
1767 Initializes two weight scanners and gets weights
1768 corresponding to two strings in a loop. If weights are not
1769 the same at some step then returns their difference.
1770
1771 In the while() comparison these situations are possible:
1772 1. (s_res>0) and (t_res>0) and (s_res == t_res)
1773 Weights are the same so far, continue comparison
1774 2. (s_res>0) and (t_res>0) and (s_res!=t_res)
1775 A difference has been found, return.
1776 3. (s_res>0) and (t_res<0)
1777 We have reached the end of the second string, or found
1778 an illegal multibyte sequence in the second string.
1779 Return a positive number, i.e. the first string is bigger.
1780 4. (s_res<0) and (t_res>0)
1781 We have reached the end of the first string, or found
1782 an illegal multibyte sequence in the first string.
1783 Return a negative number, i.e. the second string is bigger.
1784 5. (s_res<0) and (t_res<0)
1785 Both scanners returned -1. It means we have riched
1786 the end-of-string of illegal-sequence in both strings
1787 at the same time. Return 0, strings are equal.
1788
1789 RETURN
1790 Difference between two strings, according to the collation:
1791 0 - means strings are equal
1792 negative number - means the first string is smaller
1793 positive number - means the first string is bigger
1794 */
1795
1796 template <class Scanner, int LEVELS_FOR_COMPARE, class Mb_wc>
1797 10423188002 static int my_strnncoll_uca(const CHARSET_INFO *cs, const Mb_wc mb_wc,
1798 const uchar *s, size_t slen, const uchar *t,
1799 size_t tlen, bool t_is_prefix) {
1800 10423188002 Scanner sscanner(mb_wc, cs, s, slen);
1801 10423073348 Scanner tscanner(mb_wc, cs, t, tlen);
1802 10422907406 int s_res = 0;
1803 10422907406 int t_res = 0;
1804
1805 /*
1806 We compare 2 strings in same level first. If only string A's scanner
1807 has gone to next level, which means another string, B's weight of
1808 current level is longer than A's. We'll compare B's remaining weights
1809 with space.
1810 */
1811
1/2
✓ Branch 0 taken 5211497328 times.
✗ Branch 1 not taken.
10422913132 for (uint current_lv = 0; current_lv < LEVELS_FOR_COMPARE; ++current_lv) {
1812 /* Run the scanners until one of them runs out of current lv */
1813 do {
1814
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 182 times.
38171636396 s_res = sscanner.next();
1815 38250937494 t_res = tscanner.next();
1816
4/4
✓ Branch 0 taken 13973876528 times.
✓ Branch 1 taken 403196250 times.
✓ Branch 2 taken 13863831101 times.
✓ Branch 3 taken 90007467 times.
56661822692 } while (s_res == t_res && s_res >= 0 &&
1817
5/6
✓ Branch 0 taken 14377072778 times.
✓ Branch 1 taken 4748395969 times.
✓ Branch 2 taken 13953403909 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 13953360344 times.
✓ Branch 5 taken 5211930012 times.
95032479324 sscanner.get_weight_level() == current_lv &&
1818 27727662202 tscanner.get_weight_level() == current_lv);
1819
1820 /*
1821 Two scanners run to next level at same time, or we found a difference,
1822 or we found an error.
1823 */
1824
2/2
✓ Branch 0 taken 5207174308 times.
✓ Branch 1 taken 4710188 times.
10423860024 if (sscanner.get_weight_level() == tscanner.get_weight_level()) {
1825
4/4
✓ Branch 0 taken 366010689 times.
✓ Branch 1 taken 4841163619 times.
✓ Branch 2 taken 2833 times.
✓ Branch 3 taken 366007856 times.
10414348616 if (s_res == t_res && s_res >= 0) continue;
1826 10414342950 break; // Error or inequality found, end.
1827 }
1828
1829
2/2
✓ Branch 0 taken 1852950 times.
✓ Branch 1 taken 2857238 times.
9420376 if (tscanner.get_weight_level() > current_lv) {
1830 // t ran out of weights on this level, and s didn't.
1831
2/2
✓ Branch 0 taken 1638 times.
✓ Branch 1 taken 1851312 times.
3705900 if (t_is_prefix) {
1832 // Consume the rest of the weights from s.
1833 do {
1834 8424 s_res = sscanner.next();
1835
6/6
✓ Branch 0 taken 2604 times.
✓ Branch 1 taken 1608 times.
✓ Branch 2 taken 2574 times.
✓ Branch 3 taken 30 times.
✓ Branch 4 taken 2574 times.
✓ Branch 5 taken 1638 times.
8424 } while (s_res >= 0 && sscanner.get_weight_level() == current_lv);
1836
1837
2/2
✓ Branch 0 taken 1608 times.
✓ Branch 1 taken 30 times.
3276 if (s_res < 0) break; // Error found, end.
1838
1839 // s is now also on the next level. Continue comparison.
1840 60 continue;
1841 } else {
1842 // s is longer than t (and t_prefix isn't set).
1843 3702624 return 1;
1844 }
1845 }
1846
1847
1/2
✓ Branch 0 taken 2857238 times.
✗ Branch 1 not taken.
5714476 if (sscanner.get_weight_level() > current_lv) {
1848 // s ran out of weights on this level, and t didn't.
1849 5714476 return -1;
1850 }
1851
1852 break;
1853 }
1854
1855 10414264642 return (s_res - t_res);
1856 }
1857
1858 974136 static inline int my_space_weight(const CHARSET_INFO *cs) /* W3-TODO */
1859 {
1860
2/4
✓ Branch 0 taken 974136 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 974136 times.
974136 if (cs->uca && cs->uca->version == UCA_V900)
1861 return UCA900_WEIGHT(cs->uca->weights[0], /*weight_lv=*/0, 0x20);
1862 else
1863 974136 return cs->uca->weights[0][0x20 * cs->uca->lengths[0]];
1864 }
1865
1866 /**
1867 Helper function:
1868 Find address of weights of the given code point.
1869
1870 @param uca Pointer to UCA data
1871 @param wc character Unicode code point
1872
1873 @return Weight array
1874 @retval pointer to weight array for the given code point,
1875 or nullptr if this page does not have implicit weights.
1876 */
1877
1878 1018732 static inline uint16 *my_char_weight_addr(MY_UCA_INFO *uca, my_wc_t wc) {
1879 uint page, ofst;
1880
1/2
✓ Branch 0 taken 1018732 times.
✗ Branch 1 not taken.
2037464 return wc > uca->maxchar ? nullptr
1881 1018732 : (uca->weights[page = (wc >> 8)]
1882
1/2
✓ Branch 0 taken 1018732 times.
✗ Branch 1 not taken.
1018732 ? uca->weights[page] + (ofst = (wc & 0xFF)) *
1883 1018732 uca->lengths[page]
1884 1018732 : nullptr);
1885 }
1886
1887 /**
1888 Helper function:
1889 Find address of weights of the given code point, for UCA 9.0.0 format.
1890
1891 @param uca Pointer to UCA data
1892 @param wc character Unicode code point
1893
1894 @return Weight array
1895 @retval pointer to weight array for the given code point,
1896 or nullptr if this page does not have implicit weights.
1897 */
1898
1899 296798798 static inline uint16 *my_char_weight_addr_900(MY_UCA_INFO *uca, my_wc_t wc) {
1900
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 296798798 times.
296798798 if (wc > uca->maxchar) return nullptr;
1901
1902 296798798 uint page = wc >> 8;
1903 296798798 uint ofst = wc & 0xFF;
1904 296798798 uint16 *weights = uca->weights[page];
1905
2/2
✓ Branch 0 taken 296795184 times.
✓ Branch 1 taken 3614 times.
296798798 if (weights)
1906 296795184 return UCA900_WEIGHT_ADDR(weights, /*level=*/0, ofst);
1907 else
1908 3614 return nullptr;
1909 }
1910
1911 /*
1912 Compares two strings according to the collation,
1913 ignoring trailing spaces.
1914
1915 SYNOPSIS:
1916 my_strnncollsp_uca()
1917 cs Character set information
1918 s First string
1919 slen First string length
1920 t Second string
1921 tlen Second string length
1922
1923 NOTES:
1924 Works exactly the same with my_strnncoll_uca(),
1925 but ignores trailing spaces.
1926
1927 In the while() comparison these situations are possible:
1928 1. (s_res>0) and (t_res>0) and (s_res == t_res)
1929 Weights are the same so far, continue comparison
1930 2. (s_res>0) and (t_res>0) and (s_res!=t_res)
1931 A difference has been found, return.
1932 3. (s_res>0) and (t_res<0)
1933 We have reached the end of the second string, or found
1934 an illegal multibyte sequence in the second string.
1935 Compare the first string to an infinite array of
1936 space characters until difference is found, or until
1937 the end of the first string.
1938 4. (s_res<0) and (t_res>0)
1939 We have reached the end of the first string, or found
1940 an illegal multibyte sequence in the first string.
1941 Compare the second string to an infinite array of
1942 space characters until difference is found or until
1943 the end of the second steing.
1944 5. (s_res<0) and (t_res<0)
1945 Both scanners returned -1. It means we have riched
1946 the end-of-string of illegal-sequence in both strings
1947 at the same time. Return 0, strings are equal.
1948
1949 RETURN
1950 Difference between two strings, according to the collation:
1951 0 - means strings are equal
1952 negative number - means the first string is smaller
1953 positive number - means the first string is bigger
1954 */
1955
1956 template <class Mb_wc>
1957 553356 static int my_strnncollsp_uca(const CHARSET_INFO *cs, Mb_wc mb_wc,
1958 const uchar *s, size_t slen, const uchar *t,
1959 size_t tlen) {
1960 int s_res, t_res;
1961
1962 553356 uca_scanner_any<Mb_wc> sscanner(mb_wc, cs, s, slen);
1963 553366 uca_scanner_any<Mb_wc> tscanner(mb_wc, cs, t, tlen);
1964
1965 do {
1966
2/2
✓ Branch 0 taken 105108 times.
✓ Branch 1 taken 7728108 times.
15666432 s_res = sscanner.next();
1967 15667062 t_res = tscanner.next();
1968
4/4
✓ Branch 0 taken 7805855 times.
✓ Branch 1 taken 27676 times.
✓ Branch 2 taken 7556839 times.
✓ Branch 3 taken 249016 times.
15667062 } while (s_res == t_res && s_res > 0);
1969
1970
4/4
✓ Branch 0 taken 21618 times.
✓ Branch 1 taken 255074 times.
✓ Branch 2 taken 438 times.
✓ Branch 3 taken 21180 times.
553384 if (s_res > 0 && t_res < 0) {
1971 /* Calculate weight for SPACE character */
1972 876 t_res = my_space_weight(cs);
1973
1974 /* compare the first string to spaces */
1975 do {
1976
2/2
✓ Branch 0 taken 201 times.
✓ Branch 1 taken 376 times.
1154 if (s_res != t_res) return (s_res - t_res);
1977 752 s_res = sscanner.next();
1978
2/2
✓ Branch 0 taken 139 times.
✓ Branch 1 taken 237 times.
752 } while (s_res > 0);
1979 474 return 0;
1980 }
1981
1982
4/4
✓ Branch 0 taken 255075 times.
✓ Branch 1 taken 21179 times.
✓ Branch 2 taken 6015 times.
✓ Branch 3 taken 249060 times.
552508 if (s_res < 0 && t_res > 0) {
1983 /* Calculate weight for SPACE character */
1984 12030 s_res = my_space_weight(cs);
1985
1986 /* compare the second string to spaces */
1987 do {
1988
2/2
✓ Branch 0 taken 5577 times.
✓ Branch 1 taken 1448 times.
14050 if (s_res != t_res) return (s_res - t_res);
1989 2896 t_res = tscanner.next();
1990
2/2
✓ Branch 0 taken 1010 times.
✓ Branch 1 taken 438 times.
2896 } while (t_res > 0);
1991 876 return 0;
1992 }
1993
1994 540478 return (s_res - t_res);
1995 }
1996
1997 /*
1998 Calculates hash value for the given string,
1999 according to the collation, and ignoring trailing spaces.
2000
2001 SYNOPSIS:
2002 my_hash_sort_uca()
2003 cs Character set information
2004 s String
2005 slen String's length
2006 n1 First hash parameter
2007 n2 Second hash parameter
2008
2009 NOTES:
2010 Scans consequently weights and updates
2011 hash parameters n1 and n2. In a case insensitive collation,
2012 upper and lower case of the same letter will return the same
2013 weight sequence, and thus will produce the same hash values
2014 in n1 and n2.
2015
2016 RETURN
2017 N/A
2018 */
2019
2020 template <class Mb_wc>
2021 5568 static void my_hash_sort_uca(const CHARSET_INFO *cs, Mb_wc mb_wc,
2022 const uchar *s, size_t slen, uint64 *n1,
2023 uint64 *n2) {
2024 int s_res;
2025 uint64 tmp1;
2026 uint64 tmp2;
2027
2028
1/2
✓ Branch 0 taken 2784 times.
✗ Branch 1 not taken.
5568 slen = cs->cset->lengthsp(cs, pointer_cast<const char *>(s), slen);
2029 5568 uca_scanner_any<Mb_wc> scanner(mb_wc, cs, s, slen);
2030
2031 5568 tmp1 = *n1;
2032 5568 tmp2 = *n2;
2033
2034
2/2
✓ Branch 0 taken 126188 times.
✓ Branch 1 taken 2784 times.
263512 while ((s_res = scanner.next()) > 0) {
2035 252376 tmp1 ^= (((tmp1 & 63) + tmp2) * (s_res >> 8)) + (tmp1 << 8);
2036 252376 tmp2 += 3;
2037 252376 tmp1 ^= (((tmp1 & 63) + tmp2) * (s_res & 0xFF)) + (tmp1 << 8);
2038 252376 tmp2 += 3;
2039 }
2040
2041 5568 *n1 = tmp1;
2042 5568 *n2 = tmp2;
2043 5568 }
2044
2045 /*
2046 For the given string creates its "binary image", suitable
2047 to be used in binary comparison, i.e. in memcmp().
2048
2049 SYNOPSIS:
2050 my_strnxfrm_uca()
2051 cs Character set information
2052 dst Where to write the image
2053 dstlen Space available for the image, in bytes
2054 src The source string
2055 srclen Length of the source string, in bytes
2056
2057 NOTES:
2058 In a loop, scans weights from the source string and writes
2059 them into the binary image. In a case insensitive collation,
2060 upper and lower cases of the same letter will produce the
2061 same image subsequences. When we have reached the end-of-string
2062 or found an illegal multibyte sequence, the loop stops.
2063
2064 It is impossible to restore the original string using its
2065 binary image.
2066
2067 Binary images are used for bulk comparison purposes,
2068 e.g. in ORDER BY, when it is more efficient to create
2069 a binary image and use it instead of weight scanner
2070 for the original strings for every comparison.
2071
2072 RETURN
2073 Number of bytes that have been written into the binary image.
2074 */
2075
2076 template <class Mb_wc>
2077 507239098 static size_t my_strnxfrm_uca(const CHARSET_INFO *cs, Mb_wc mb_wc, uchar *dst,
2078 size_t dstlen, uint num_codepoints,
2079 const uchar *src, size_t srclen, uint flags) {
2080 507239098 uchar *d0 = dst;
2081 507239098 uchar *de = dst + dstlen;
2082 int s_res;
2083 507239098 uca_scanner_any<Mb_wc> scanner(mb_wc, cs, src, srclen);
2084
2085
6/6
✓ Branch 0 taken 540461295 times.
✓ Branch 1 taken 248 times.
✓ Branch 2 taken 286841994 times.
✓ Branch 3 taken 253619301 times.
✓ Branch 4 taken 286841994 times.
✓ Branch 5 taken 253619549 times.
2161845676 while (dst < de && (s_res = scanner.next()) > 0) {
2086 573683988 *dst++ = s_res >> 8;
2087
1/2
✓ Branch 0 taken 286841994 times.
✗ Branch 1 not taken.
573683988 if (dst < de) *dst++ = s_res & 0xFF;
2088 }
2089
2090
2/2
✓ Branch 0 taken 253619301 times.
✓ Branch 1 taken 248 times.
507239098 if (dst < de) {
2091 /*
2092 PAD SPACE behavior.
2093
2094 We still have space left in the output buffer, which must mean
2095 that the scanner is at the end of the last level. Find out
2096 how many weights we wrote per level, and add any remaining
2097 spaces we need to get us up to the requested total.
2098 */
2099
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 253619301 times.
507238602 assert(num_codepoints >= scanner.get_char_index());
2100 507238602 num_codepoints -= scanner.get_char_index();
2101
2102
2/2
✓ Branch 0 taken 627887 times.
✓ Branch 1 taken 252991414 times.
507238602 if (num_codepoints) {
2103 1255774 uint space_count = std::min<uint>((de - dst) / 2, num_codepoints);
2104 1255774 s_res = my_space_weight(cs);
2105
2/2
✓ Branch 0 taken 1783514 times.
✓ Branch 1 taken 627887 times.
4822802 for (; space_count; space_count--) {
2106 7134056 dst = store16be(dst, s_res);
2107 }
2108 }
2109 }
2110
4/4
✓ Branch 0 taken 340005 times.
✓ Branch 1 taken 253279544 times.
✓ Branch 2 taken 339796 times.
✓ Branch 3 taken 209 times.
507239098 if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de) {
2111 679592 s_res = my_space_weight(cs);
2112
2/2
✓ Branch 0 taken 23797171 times.
✓ Branch 1 taken 339796 times.
48273934 for (; dst < de;) {
2113 47594342 *dst++ = s_res >> 8;
2114
1/2
✓ Branch 0 taken 23797171 times.
✗ Branch 1 not taken.
47594342 if (dst < de) *dst++ = s_res & 0xFF;
2115 }
2116 }
2117 507239098 return dst - d0;
2118 }
2119
2120 148148069 static int my_uca_charcmp_900(const CHARSET_INFO *cs, my_wc_t wc1,
2121 my_wc_t wc2) {
2122 148148069 uint16 *weight1_ptr = my_char_weight_addr_900(cs->uca, wc1); /* W3-TODO */
2123 148148069 uint16 *weight2_ptr = my_char_weight_addr_900(cs->uca, wc2);
2124
2125 /* Check if some of the characters does not have implicit weights */
2126
4/4
✓ Branch 0 taken 148146934 times.
✓ Branch 1 taken 1135 times.
✓ Branch 2 taken 1812 times.
✓ Branch 3 taken 148145122 times.
148148069 if (!weight1_ptr || !weight2_ptr) return wc1 != wc2;
2127
2128
5/6
✓ Branch 0 taken 148144905 times.
✓ Branch 1 taken 217 times.
✓ Branch 2 taken 148144905 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 146229511 times.
✓ Branch 5 taken 1915394 times.
148145122 if (weight1_ptr[0] && weight2_ptr[0] && weight1_ptr[0] != weight2_ptr[0])
2129 146229511 return 1;
2130
2131 /* Thoroughly compare all weights */
2132 1915611 size_t length1 = weight1_ptr[-UCA900_DISTANCE_BETWEEN_LEVELS];
2133 1915611 size_t length2 = weight2_ptr[-UCA900_DISTANCE_BETWEEN_LEVELS];
2134
2135
2/2
✓ Branch 0 taken 1915708 times.
✓ Branch 1 taken 1915364 times.
3831072 for (int level = 0; level < cs->levels_for_compare; ++level) {
2136 1915708 size_t wt_ind1 = 0;
2137 1915708 size_t wt_ind2 = 0;
2138 1915708 uint16 *weight1 = weight1_ptr + level * UCA900_DISTANCE_BETWEEN_LEVELS;
2139 1915708 uint16 *weight2 = weight2_ptr + level * UCA900_DISTANCE_BETWEEN_LEVELS;
2140
4/4
✓ Branch 0 taken 1915856 times.
✓ Branch 1 taken 1915385 times.
✓ Branch 2 taken 1915814 times.
✓ Branch 3 taken 42 times.
3831241 while (wt_ind1 < length1 && wt_ind2 < length2) {
2141 // Zero weight is ignorable.
2142
4/4
✓ Branch 0 taken 1915814 times.
✓ Branch 1 taken 272 times.
✓ Branch 2 taken 272 times.
✓ Branch 3 taken 1915542 times.
1916086 for (; wt_ind1 < length1 && !*weight1; wt_ind1++)
2143 272 weight1 += UCA900_DISTANCE_BETWEEN_WEIGHTS;
2144
2/2
✓ Branch 0 taken 272 times.
✓ Branch 1 taken 1915542 times.
1915814 if (wt_ind1 == length1) break;
2145
2/4
✓ Branch 0 taken 1915542 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1915542 times.
1915542 for (; wt_ind2 < length2 && !*weight2; wt_ind2++)
2146 weight2 += UCA900_DISTANCE_BETWEEN_WEIGHTS;
2147
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1915542 times.
1915542 if (wt_ind2 == length2) break;
2148
2149 // Check if these two non-ignorable weights are equal.
2150
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 1915533 times.
1915542 if (*weight1 != *weight2) return 1;
2151 1915533 wt_ind1++;
2152 1915533 wt_ind2++;
2153 1915533 weight1 += UCA900_DISTANCE_BETWEEN_WEIGHTS;
2154 1915533 weight2 += UCA900_DISTANCE_BETWEEN_WEIGHTS;
2155 }
2156 /*
2157 If either character is out of weights but we have equality so far,
2158 check if the other character has any non-ignorable weights left.
2159 */
2160
2/2
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 1915693 times.
1915735 for (; wt_ind1 < length1; wt_ind1++) {
2161
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 36 times.
42 if (*weight1) return 1;
2162 36 weight1 += UCA900_DISTANCE_BETWEEN_WEIGHTS;
2163 }
2164
2/2
✓ Branch 0 taken 305 times.
✓ Branch 1 taken 1915461 times.
1915766 for (; wt_ind2 < length2; wt_ind2++) {
2165
2/2
✓ Branch 0 taken 232 times.
✓ Branch 1 taken 73 times.
305 if (*weight2) return 1;
2166 73 weight2 += UCA900_DISTANCE_BETWEEN_WEIGHTS;
2167 }
2168 }
2169 1915364 return 0;
2170 }
2171
2172 /*
2173 This function compares if two code points are the same.
2174 The sign +1 or -1 does not matter. The only
2175 important thing is that the result is 0 or not 0.
2176 This fact allows us to use memcmp() safely, on both
2177 little-endian and big-endian machines.
2178 */
2179
2180 182086189 static int my_uca_charcmp(const CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) {
2181
2/2
✓ Branch 0 taken 33936011 times.
✓ Branch 1 taken 148150178 times.
182086189 if (wc1 == wc2) return 0;
2182
2183
3/4
✓ Branch 0 taken 148150178 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 148148069 times.
✓ Branch 3 taken 2109 times.
148150178 if (cs->uca != nullptr && cs->uca->version == UCA_V900)
2184 148148069 return my_uca_charcmp_900(cs, wc1, wc2);
2185
2186 size_t length1, length2;
2187 2109 uint16 *weight1 = my_char_weight_addr(cs->uca, wc1); /* W3-TODO */
2188 2109 uint16 *weight2 = my_char_weight_addr(cs->uca, wc2);
2189
2190 /* Check if some of the code points does not have implicit weights */
2191
2/4
✓ Branch 0 taken 2109 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2109 times.
2109 if (!weight1 || !weight2) return wc1 != wc2;
2192
2193 /* Quickly compare first weights */
2194
2/2
✓ Branch 0 taken 2073 times.
✓ Branch 1 taken 36 times.
2109 if (weight1[0] != weight2[0]) return 1;
2195
2196 /* Thoroughly compare all weights */
2197 36 length1 = cs->uca->lengths[wc1 >> MY_UCA_PSHIFT]; /* W3-TODO */
2198 36 length2 = cs->uca->lengths[wc2 >> MY_UCA_PSHIFT];
2199
2200
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
36 if (length1 > length2)
2201 return memcmp((const void *)weight1, (const void *)weight2, length2 * 2)
2202 ? 1
2203 : weight1[length2];
2204
2205
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
36 if (length1 < length2)
2206 return memcmp((const void *)weight1, (const void *)weight2, length1 * 2)
2207 ? 1
2208 : weight2[length1];
2209
2210 36 return memcmp((const void *)weight1, (const void *)weight2, length1 * 2);
2211 }
2212
2213 /*** Compare string against string with wildcard
2214 ** 0 if matched
2215 ** -1 if not matched with wildcard
2216 ** 1 if matched with wildcard
2217 */
2218
2219 131829251 static int my_wildcmp_uca_impl(const CHARSET_INFO *cs, const char *str,
2220 const char *str_end, const char *wildstr,
2221 const char *wildend, int escape, int w_one,
2222 int w_many, int recurse_level) {
2223
4/6
✓ Branch 0 taken 131828825 times.
✓ Branch 1 taken 426 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 131828825 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 131829251 times.
131829251 if (my_string_stack_guard && my_string_stack_guard(recurse_level)) return 1;
2224
2/2
✓ Branch 0 taken 131828585 times.
✓ Branch 1 taken 666 times.
131829251 while (wildstr != wildend) {
2225 131828585 int result = -1; /* Not found, using wildcards */
2226 131828585 auto mb_wc = cs->cset->mb_wc;
2227
2228 /*
2229 Compare the expression and pattern strings character-by-character until
2230 we find a '%' (w_many) in the pattern string. Once we do, we break out
2231 of the loop and try increasingly large widths for the '%' match,
2232 calling ourselves recursively until we find a match. (As an
2233 optimization, we test for the character immediately after '%' before we
2234 recurse.) This takes exponential time in the worst case.
2235
2236 Example: Say we are trying to match the pattern 'ab%cd' against the
2237 string 'ab..c.cd'. We first match the initial 'ab' against each other,
2238 and then see the '%' in the pattern. Since the first character after
2239 '%' is 'c', we skip to the first 'c' in the expression string, and try
2240 to match 'c.cd' against 'cd' by a recursive call. Since this failed, we
2241 scan for the next 'c', and try to match 'cd' against 'cd', which works.
2242 */
2243 my_wc_t w_wc;
2244 while (true) {
2245 int mb_len;
2246
1/2
✓ Branch 0 taken 168104851 times.
✗ Branch 1 not taken.
168104851 if ((mb_len = mb_wc(cs, &w_wc, (const uchar *)wildstr,
2247
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 168104851 times.
168104851 (const uchar *)wildend)) <= 0)
2248 130221051 return 1;
2249
2250 168104851 wildstr += mb_len;
2251 // If we found '%' (w_many), break out this loop.
2252
2/2
✓ Branch 0 taken 1607534 times.
✓ Branch 1 taken 166497317 times.
168104851 if (w_wc == (my_wc_t)w_many) {
2253 1607534 result = 1;
2254 1607534 break;
2255 }
2256
2257 /*
2258 If the character we just read was an escape character, skip it and
2259 read the next character instead. This character is used verbatim
2260 without checking if it is a wildcard (% or _). However, as a
2261 special exception, a lone escape character at the end of a string is
2262 treated as itself.
2263 */
2264 166497317 bool escaped = false;
2265
4/4
✓ Branch 0 taken 7536 times.
✓ Branch 1 taken 166489781 times.
✓ Branch 2 taken 7522 times.
✓ Branch 3 taken 14 times.
166497317 if (w_wc == (my_wc_t)escape && wildstr < wildend) {
2266
1/2
✓ Branch 0 taken 7522 times.
✗ Branch 1 not taken.
7522 if ((mb_len = mb_wc(cs, &w_wc, (const uchar *)wildstr,
2267
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7522 times.
7522 (const uchar *)wildend)) <= 0)
2268 return 1;
2269 7522 wildstr += mb_len;
2270 7522 escaped = true;
2271 }
2272
2273 my_wc_t s_wc;
2274
1/2
✓ Branch 0 taken 166497317 times.
✗ Branch 1 not taken.
166497317 if ((mb_len = mb_wc(cs, &s_wc, (const uchar *)str,
2275
2/2
✓ Branch 0 taken 17541 times.
✓ Branch 1 taken 166479776 times.
166497317 (const uchar *)str_end)) <= 0)
2276 17541 return 1;
2277 166479776 str += mb_len;
2278
2279 // If we found '_' (w_one), skip one character in expression string.
2280
4/4
✓ Branch 0 taken 166472270 times.
✓ Branch 1 taken 7506 times.
✓ Branch 2 taken 2381496 times.
✓ Branch 3 taken 164090774 times.
166479776 if (!escaped && w_wc == (my_wc_t)w_one) {
2281 2381496 result = 1;
2282 } else {
2283
2/2
✓ Branch 0 taken 129958894 times.
✓ Branch 1 taken 34139386 times.
164098280 if (my_uca_charcmp(cs, s_wc, w_wc)) return 1;
2284 }
2285
2/2
✓ Branch 0 taken 244616 times.
✓ Branch 1 taken 36276266 times.
36520882 if (wildstr == wildend)
2286 244616 return (str != str_end); /* Match if both are at end */
2287 36276266 }
2288
2289
1/2
✓ Branch 0 taken 1607534 times.
✗ Branch 1 not taken.
1607534 if (w_wc == (my_wc_t)w_many) {
2290 // Remove any '%' and '_' following w_many in the pattern string.
2291 for (;;) {
2292
2/2
✓ Branch 0 taken 534622 times.
✓ Branch 1 taken 1086683 times.
1621305 if (wildstr == wildend) {
2293 /*
2294 The previous w_many (%) was the last character in the pattern
2295 string, so we have a match no matter what the rest of the
2296 expression string looks like (even empty).
2297 */
2298 534622 return 0;
2299 }
2300 int mb_len_wild =
2301
1/2
✓ Branch 0 taken 1086683 times.
✗ Branch 1 not taken.
1086683 mb_wc(cs, &w_wc, (const uchar *)wildstr, (const uchar *)wildend);
2302
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1086683 times.
1086683 if (mb_len_wild <= 0) return 1;
2303 1086683 wildstr += mb_len_wild;
2304
2/2
✓ Branch 0 taken 2601 times.
✓ Branch 1 taken 1084082 times.
1086683 if (w_wc == (my_wc_t)w_many) continue;
2305
2306
2/2
✓ Branch 0 taken 11170 times.
✓ Branch 1 taken 1072912 times.
1084082 if (w_wc == (my_wc_t)w_one) {
2307 /*
2308 Skip one character in expression string because '_' needs to
2309 match one.
2310 */
2311 my_wc_t s_wc;
2312 int mb_len =
2313
1/2
✓ Branch 0 taken 11170 times.
✗ Branch 1 not taken.
11170 mb_wc(cs, &s_wc, (const uchar *)str, (const uchar *)str_end);
2314
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 11170 times.
11170 if (mb_len <= 0) return 1;
2315 11170 str += mb_len;
2316 11170 continue;
2317 11170 }
2318 1072912 break; /* Not a wild character */
2319 13771 }
2320
2321 // No character in the expression string to match w_wc.
2322
2/2
✓ Branch 0 taken 628 times.
✓ Branch 1 taken 1072284 times.
1072912 if (str == str_end) return -1;
2323
2324 // Skip the escape character ('\') in the pattern if needed.
2325
4/4
✓ Branch 0 taken 126 times.
✓ Branch 1 taken 1072158 times.
✓ Branch 2 taken 113 times.
✓ Branch 3 taken 13 times.
1072284 if (w_wc == (my_wc_t)escape && wildstr < wildend) {
2326 int mb_len =
2327
1/2
✓ Branch 0 taken 113 times.
✗ Branch 1 not taken.
113 mb_wc(cs, &w_wc, (const uchar *)wildstr, (const uchar *)wildend);
2328
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 113 times.
113 if (mb_len <= 0) return 1;
2329 113 wildstr += mb_len;
2330 }
2331
2332 /*
2333 w_wc is now the character following w_many (e.g., if the pattern is
2334 "a%c", w_wc is 'c').
2335 */
2336 while (true) {
2337 /*
2338 Skip until we find a character in the expression string that is
2339 equal to w_wc.
2340 */
2341 2540992 int mb_len = 0;
2342
2/2
✓ Branch 0 taken 17987909 times.
✓ Branch 1 taken 828975 times.
18816884 while (str != str_end) {
2343 my_wc_t s_wc;
2344
1/2
✓ Branch 0 taken 17987909 times.
✗ Branch 1 not taken.
17987909 if ((mb_len = mb_wc(cs, &s_wc, (const uchar *)str,
2345
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 17987909 times.
17987909 (const uchar *)str_end)) <= 0)
2346 return 1;
2347
2348
2/2
✓ Branch 0 taken 1712017 times.
✓ Branch 1 taken 16275892 times.
17987909 if (!my_uca_charcmp(cs, s_wc, w_wc)) break;
2349 16275892 str += mb_len;
2350 }
2351 // No character in the expression string is equal to w_wc.
2352
2/2
✓ Branch 0 taken 828975 times.
✓ Branch 1 taken 1712017 times.
2540992 if (str == str_end) return -1;
2353 1712017 str += mb_len;
2354
2355 /*
2356 The strings match up until the first character after w_many in the
2357 pattern string. For the rest part of pattern string and expression
2358 string, we recursively call to get wild compare result.
2359 Example, wildcmp(..., "abcdefg", "a%de%g", ...), we'll run again on
2360 wildcmp(..., "efg", "e%g", ...).
2361 */
2362
1/2
✓ Branch 0 taken 1712017 times.
✗ Branch 1 not taken.
1712017 result = my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend, escape,
2363 w_one, w_many, recurse_level + 1);
2364
2365
2/2
✓ Branch 0 taken 243309 times.
✓ Branch 1 taken 1468708 times.
1712017 if (result <= 0) return result;
2366 1468708 }
2367 }
2368 }
2369
2/2
✓ Branch 0 taken 588 times.
✓ Branch 1 taken 78 times.
666 return (str != str_end ? 1 : 0);
2370 }
2371
2372 15962 static int my_strcasecmp_uca(const CHARSET_INFO *cs, const char *s,
2373 const char *t) {
2374 15962 const MY_UNICASE_INFO *uni_plane = cs->caseinfo;
2375 const MY_UNICASE_CHARACTER *page;
2376
4/4
✓ Branch 0 taken 34282 times.
✓ Branch 1 taken 3075 times.
✓ Branch 2 taken 33823 times.
✓ Branch 3 taken 459 times.
37357 while (s[0] && t[0]) {
2377 my_wc_t s_wc, t_wc;
2378
2379
1/2
✓ Branch 0 taken 33823 times.
✗ Branch 1 not taken.
33823 if (static_cast<uchar>(s[0]) < 128) {
2380 33823 s_wc = uni_plane->page[0][static_cast<uchar>(s[0])].tolower;
2381 33823 s++;
2382 } else {
2383 int res;
2384
2385 res = cs->cset->mb_wc(cs, &s_wc, pointer_cast<const uchar *>(s),
2386 pointer_cast<const uchar *>(s + 4));
2387
2388
0/2
✗ Branch 0 not taken.
✗ Branch 1 not taken.
12428 if (res <= 0) return strcmp(s, t);
2389 s += res;
2390 if (s_wc <= uni_plane->maxchar && (page = uni_plane->page[s_wc >> 8]))
2391 s_wc = page[s_wc & 0xFF].tolower;
2392 }
2393
2394 /* Do the same for the second string */
2395
2396
1/2
✓ Branch 0 taken 33823 times.
✗ Branch 1 not taken.
33823 if (static_cast<uchar>(t[0]) < 128) {
2397 /* Convert single byte character into weight */
2398 33823 t_wc = uni_plane->page[0][static_cast<uchar>(t[0])].tolower;
2399 33823 t++;
2400 } else {
2401 int res = cs->cset->mb_wc(cs, &t_wc, pointer_cast<const uchar *>(t),
2402 pointer_cast<const uchar *>(t + 4));
2403 if (res <= 0) return strcmp(s, t);
2404 t += res;
2405
2406 if (t_wc <= uni_plane->maxchar && (page = uni_plane->page[t_wc >> 8]))
2407 t_wc = page[t_wc & 0xFF].tolower;
2408 }
2409
2410 /* Now we have two weights, let's compare them */
2411
2/2
✓ Branch 0 taken 12428 times.
✓ Branch 1 taken 21395 times.
33823 if (s_wc != t_wc) return static_cast<int>(s_wc) - static_cast<int>(t_wc);
2412 }
2413 3534 return static_cast<int>(static_cast<uchar>(s[0])) -
2414 3534 static_cast<int>(static_cast<uchar>(t[0]));
2415 }
2416
2417 extern "C" {
2418 130117234 static int my_wildcmp_uca(const CHARSET_INFO *cs, const char *str,
2419 const char *str_end, const char *wildstr,
2420 const char *wildend, int escape, int w_one,
2421 int w_many) {
2422 130117234 return my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend, escape, w_one,
2423 130117234 w_many, 1);
2424 }
2425 } // extern "C"
2426
2427 /*
2428 Collation language is implemented according to
2429 subset of ICU Collation Customization (tailorings):
2430 http://icu.sourceforge.net/userguide/Collate_Customization.html
2431
2432 Collation language elements:
2433 Delimiters:
2434 space - skipped
2435
2436 <char> := A-Z | a-z | \uXXXX
2437
2438 Shift command:
2439 <shift> := & - reset at this letter.
2440
2441 Diff command:
2442 <d1> := < - Identifies a primary difference.
2443 <d2> := << - Identifies a secondary difference.
2444 <d3> := <<< - Idenfifies a tertiary difference.
2445
2446
2447 Collation rules:
2448 <ruleset> := <rule> { <ruleset> }
2449
2450 <rule> := <d1> <string>
2451 | <d2> <string>
2452 | <d3> <string>
2453 | <shift> <char>
2454
2455 <string> := <char> [ <string> ]
2456
2457 An example, Polish collation:
2458
2459 &A < \u0105 <<< \u0104
2460 &C < \u0107 <<< \u0106
2461 &E < \u0119 <<< \u0118
2462 &L < \u0142 <<< \u0141
2463 &N < \u0144 <<< \u0143
2464 &O < \u00F3 <<< \u00D3
2465 &S < \u015B <<< \u015A
2466 &Z < \u017A <<< \u017B
2467 */
2468
2469 typedef enum my_coll_lexem_num_en {
2470 MY_COLL_LEXEM_EOF = 0,
2471 MY_COLL_LEXEM_SHIFT = 1,
2472 MY_COLL_LEXEM_RESET = 4,
2473 MY_COLL_LEXEM_CHAR = 5,
2474 MY_COLL_LEXEM_ERROR = 6,
2475 MY_COLL_LEXEM_OPTION = 7,
2476 MY_COLL_LEXEM_EXTEND = 8,
2477 MY_COLL_LEXEM_CONTEXT = 9
2478 } my_coll_lexem_num;
2479
2480 /**
2481 Convert collation customization lexem to string,
2482 for nice error reporting
2483
2484 @param term lexem code
2485
2486 @return lexem name
2487 */
2488
2489 1 static const char *my_coll_lexem_num_to_str(my_coll_lexem_num term) {
2490
1/9
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
1 switch (term) {
2491 case MY_COLL_LEXEM_EOF:
2492 return "EOF";
2493 1 case MY_COLL_LEXEM_SHIFT:
2494 1 return "Shift";
2495 case MY_COLL_LEXEM_RESET:
2496 return "&";
2497 case MY_COLL_LEXEM_CHAR:
2498 return "Character";
2499 case MY_COLL_LEXEM_OPTION:
2500 return "Bracket option";
2501 case MY_COLL_LEXEM_EXTEND:
2502 return "/";
2503 case MY_COLL_LEXEM_CONTEXT:
2504 return "|";
2505 case MY_COLL_LEXEM_ERROR:
2506 return "ERROR";
2507 }
2508 return nullptr;
2509 }
2510
2511 struct MY_COLL_LEXEM {
2512 my_coll_lexem_num term;
2513 const char *beg;
2514 const char *end;
2515 const char *prev;
2516 int diff;
2517 int code;
2518 };
2519
2520 /*
2521 Initialize collation rule lexical anilizer
2522
2523 SYNOPSIS
2524 my_coll_lexem_init
2525 lexem Lex analyzer to init
2526 str Const string to parse
2527 str_end End of the string
2528 USAGE
2529
2530 RETURN VALUES
2531 N/A
2532 */
2533
2534 29009 static void my_coll_lexem_init(MY_COLL_LEXEM *lexem, const char *str,
2535 const char *str_end) {
2536 29009 lexem->beg = str;
2537 29009 lexem->prev = str;
2538 29009 lexem->end = str_end;
2539 29009 lexem->diff = 0;
2540 29009 lexem->code = 0;
2541 29009 }
2542
2543 /**
2544 Compare lexem to string with length
2545
2546 @param lexem lexem
2547 @param pattern string
2548 @param patternlen string length
2549
2550 @retval 0 if lexem is equal to string, non-0 otherwise.
2551 */
2552
2553 125963 static int lex_cmp(MY_COLL_LEXEM *lexem, const char *pattern,
2554 size_t patternlen) {
2555 125963 size_t lexemlen = lexem->beg - lexem->prev;
2556
2/2
✓ Branch 0 taken 62805 times.
✓ Branch 1 taken 63158 times.
125963 if (lexemlen < patternlen) return 1; /* Not a prefix */
2557 63158 return native_strncasecmp(lexem->prev, pattern, patternlen);
2558 }
2559
2560 /*
2561 Print collation customization expression parse error, with context.
2562
2563 SYNOPSIS
2564 my_coll_lexem_print_error
2565 lexem Lex analyzer to take context from
2566 errstr string to write error to
2567 errsize errstr size
2568 txt error message
2569 col_name collation name
2570 USAGE
2571
2572 RETURN VALUES
2573 N/A
2574 */
2575
2576 3 static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem, char *errstr,
2577 size_t errsize, const char *txt,
2578 const char *col_name) {
2579 char tail[30];
2580 3 size_t len = lexem->end - lexem->prev;
2581
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 strmake(tail, lexem->prev, std::min(len, sizeof(tail) - 1));
2582 3 errstr[errsize - 1] = '\0';
2583 3 snprintf(errstr, errsize - 1, "%s at '%s' for COLLATION : %s",
2584
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
3 txt[0] ? txt : "Syntax error", tail, col_name);
2585 3 }
2586
2587 /*
2588 Convert a hex digit into its numeric value
2589
2590 SYNOPSIS
2591 ch2x
2592 ch hex digit to convert
2593 USAGE
2594
2595 RETURN VALUES
2596 an integer value in the range 0..15
2597 -1 on error
2598 */
2599
2600 6647976 static int ch2x(int ch) {
2601
4/4
✓ Branch 0 taken 5881834 times.
✓ Branch 1 taken 766142 times.
✓ Branch 2 taken 3574586 times.
✓ Branch 3 taken 2307248 times.
6647976 if (ch >= '0' && ch <= '9') return ch - '0';
2602
2603
4/4
✓ Branch 0 taken 208941 times.
✓ Branch 1 taken 2864449 times.
✓ Branch 2 taken 4482 times.
✓ Branch 3 taken 204459 times.
3073390 if (ch >= 'a' && ch <= 'f') return 10 + ch - 'a';
2604
2605
4/4
✓ Branch 0 taken 1967559 times.
✓ Branch 1 taken 1101349 times.
✓ Branch 2 taken 1752102 times.
✓ Branch 3 taken 215457 times.
3068908 if (ch >= 'A' && ch <= 'F') return 10 + ch - 'A';
2606
2607 1316806 return -1;
2608 }
2609
2610 /*
2611 Collation language lexical parser:
2612 Scans the next lexem.
2613
2614 SYNOPSIS
2615 my_coll_lexem_next
2616 lexem Lex analyzer, previously initialized by
2617 my_coll_lexem_init.
2618 USAGE
2619 Call this function in a loop
2620
2621 RETURN VALUES
2622 Lexem number: eof, diff, shift, char or error.
2623 */
2624
2625 3480019 static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem) {
2626 const char *beg;
2627 my_coll_lexem_num rc;
2628
2629
2/2
✓ Branch 0 taken 5070598 times.
✓ Branch 1 taken 87022 times.
5157620 for (beg = lexem->beg; beg < lexem->end; beg++) {
2630
8/8
✓ Branch 0 taken 1677601 times.
✓ Branch 1 taken 25980 times.
✓ Branch 2 taken 293472 times.
✓ Branch 3 taken 267214 times.
✓ Branch 4 taken 56538 times.
✓ Branch 5 taken 204479 times.
✓ Branch 6 taken 795021 times.
✓ Branch 7 taken 1750293 times.
5070598 switch (*beg) {
2631 1677601 case ' ':
2632 case '\t':
2633 case '\r':
2634 case '\n':
2635 1677601 continue;
2636
2637 25980 case '[': /* Bracket expression, e.g. "[optimize [a-z]]" */
2638 {
2639 size_t nbrackets; /* Indicates nested recursion level */
2640
1/2
✓ Branch 0 taken 235013 times.
✗ Branch 1 not taken.
235013 for (beg++, nbrackets = 1; beg < lexem->end; beg++) {
2641
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 235013 times.
235013 if (*beg == '[') /* Enter nested bracket expression */
2642 nbrackets++;
2643
2/2
✓ Branch 0 taken 25980 times.
✓ Branch 1 taken 209033 times.
235013 else if (*beg == ']') {
2644
1/2
✓ Branch 0 taken 25980 times.
✗ Branch 1 not taken.
25980 if (--nbrackets == 0) {
2645 25980 rc = MY_COLL_LEXEM_OPTION;
2646 25980 beg++;
2647 25980 goto ex;
2648 }
2649 }
2650 }
2651 rc = MY_COLL_LEXEM_ERROR;
2652 goto ex;
2653 }
2654
2655 293472 case '&':
2656 293472 beg++;
2657 293472 rc = MY_COLL_LEXEM_RESET;
2658 293472 goto ex;
2659
2660 267214 case '=':
2661 267214 beg++;
2662 267214 lexem->diff = 0;
2663 267214 rc = MY_COLL_LEXEM_SHIFT;
2664 267214 goto ex;
2665
2666 56538 case '/':
2667 56538 beg++;
2668 56538 rc = MY_COLL_LEXEM_EXTEND;
2669 56538 goto ex;
2670
2671 204479 case '|':
2672 204479 beg++;
2673 204479 rc = MY_COLL_LEXEM_CONTEXT;
2674 204479 goto ex;
2675
2676 795021 case '<': /* Shift: '<' or '<<' or '<<<' or '<<<<' */
2677 {
2678 /* Scan up to 3 additional '<' characters */
2679 795021 for (beg++, lexem->diff = 1;
2680
4/6
✓ Branch 0 taken 1683590 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 888569 times.
✓ Branch 3 taken 795021 times.
✓ Branch 4 taken 888569 times.
✗ Branch 5 not taken.
1683590 (beg < lexem->end) && (*beg == '<') && (lexem->diff <= 3);
2681 888569 beg++, lexem->diff++)
2682 ;
2683 795021 rc = MY_COLL_LEXEM_SHIFT;
2684 795021 goto ex;
2685 }
2686 1750293 default:
2687 1750293 break;
2688 1677601 }
2689
2690 /* Escaped character, e.g. \u1234 */
2691
4/6
✓ Branch 0 taken 1329975 times.
✓ Branch 1 taken 420318 times.
✓ Branch 2 taken 1329975 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1329975 times.
✗ Branch 5 not taken.
1750293 if ((*beg == '\\') && (beg + 2 < lexem->end) && (beg[1] == 'u') &&
2692
2/2
✓ Branch 0 taken 1329973 times.
✓ Branch 1 taken 2 times.
1329975 my_isxdigit(&my_charset_utf8_general_ci, beg[2])) {
2693 int ch;
2694
2695 1329973 beg += 2;
2696 1329973 lexem->code = 0;
2697
6/6
✓ Branch 0 taken 6647976 times.
✓ Branch 1 taken 13167 times.
✓ Branch 2 taken 5331170 times.
✓ Branch 3 taken 1316806 times.
✓ Branch 4 taken 5331170 times.
✓ Branch 5 taken 1329973 times.
6661143 while ((beg < lexem->end) && ((ch = ch2x(beg[0])) >= 0)) {
2698 5331170 lexem->code = (lexem->code << 4) + ch;
2699 5331170 beg++;
2700 }
2701 1329973 rc = MY_COLL_LEXEM_CHAR;
2702 1329973 goto ex;
2703 }
2704
2705 /*
2706 Unescaped single byte character:
2707 allow printable ASCII range except SPACE and
2708 special characters parsed above []<&/|=
2709 */
2710
3/4
✓ Branch 0 taken 336208 times.
✓ Branch 1 taken 84112 times.
✓ Branch 2 taken 336208 times.
✗ Branch 3 not taken.
420320 if (*beg >= 0x21 && *beg <= 0x7E) {
2711 336208 lexem->code = *beg++;
2712 336208 rc = MY_COLL_LEXEM_CHAR;
2713 336208 goto ex;
2714 }
2715
2716
1/2
✓ Branch 0 taken 84112 times.
✗ Branch 1 not taken.
84112 if (((uchar)*beg) > 0x7F) /* Unescaped multibyte character */
2717 {
2718 84112 CHARSET_INFO *cs = &my_charset_utf8_general_ci;
2719 my_wc_t wc;
2720
1/2
✓ Branch 0 taken 84112 times.
✗ Branch 1 not taken.
84112 int nbytes = cs->cset->mb_wc(cs, &wc, pointer_cast<const uchar *>(beg),
2721 84112 pointer_cast<const uchar *>(lexem->end));
2722
1/2
✓ Branch 0 taken 84112 times.
✗ Branch 1 not taken.
84112 if (nbytes > 0) {
2723 84112 rc = MY_COLL_LEXEM_CHAR;
2724 84112 beg += nbytes;
2725 84112 lexem->code = (int)wc;
2726 84112 goto ex;
2727 }
2728 }
2729
2730 rc = MY_COLL_LEXEM_ERROR;
2731 goto ex;
2732 }
2733 87022 rc = MY_COLL_LEXEM_EOF;
2734
2735 3480019 ex:
2736 3480019 lexem->prev = lexem->beg;
2737 3480019 lexem->beg = beg;
2738 3480019 lexem->term = rc;
2739 3480019 return rc;
2740 }
2741
2742 /*
2743 Collation rule item
2744 */
2745
2746 #define MY_UCA_MAX_EXPANSION 6 /* Maximum expansion length */
2747
2748 struct MY_COLL_RULE {
2749 my_wc_t base[MY_UCA_MAX_EXPANSION]; /* Base character */
2750 my_wc_t curr[MY_UCA_MAX_CONTRACTION]; /* Current character */
2751 int diff[4]; /* Primary, Secondary, Tertiary, Quaternary difference */
2752 size_t before_level; /* "reset before" indicator */
2753 bool with_context;
2754 };
2755
2756 /**
2757 Return length of the "reset" string of a rule.
2758
2759 @param r Collation customization rule
2760
2761 @return Length of r->base
2762 */
2763
2764 1250794 static inline size_t my_coll_rule_reset_length(MY_COLL_RULE *r) {
2765 1250794 return my_wstrnlen(r->base, MY_UCA_MAX_EXPANSION);
2766 }
2767
2768 /**
2769 Return length of the "shift" string of a rule.
2770
2771 @param r Collation customization rule
2772
2773 @return Length of r->base
2774 */
2775
2776 1250794 static inline size_t my_coll_rule_shift_length(MY_COLL_RULE *r) {
2777 1250794 return my_wstrnlen(r->curr, MY_UCA_MAX_CONTRACTION);
2778 }
2779
2780 /**
2781 Append new character to the end of a 0-terminated wide string.
2782
2783 @param wc Wide string
2784 @param limit Maximum possible result length
2785 @param code Character to add
2786
2787 @return 1 if character was added, 0 if string was too long
2788 */
2789
2790 1750409 static int my_coll_rule_expand(my_wc_t *wc, size_t limit, my_wc_t code) {
2791 size_t i;
2792
1/2
✓ Branch 0 taken 1957313 times.
✗ Branch 1 not taken.
1957313 for (i = 0; i < limit; i++) {
2793
2/2
✓ Branch 0 taken 1750409 times.
✓ Branch 1 taken 206904 times.
1957313 if (wc[i] == 0) {
2794 1750409 wc[i] = code;
2795 1750409 return 1;
2796 }
2797 }
2798 return 0;
2799 }
2800
2801 /**
2802 Initialize collation customization rule
2803
2804 @param r Rule
2805 */
2806
2807 293472 static void my_coll_rule_reset(MY_COLL_RULE *r) { memset(r, 0, sizeof(*r)); }
2808
2809 /*
2810 Shift methods:
2811 Simple: "&B < C" : weight('C') = weight('B') + 1
2812 Expand: weight('C') = { weight('B'), weight(last_non_ignorable) + 1 }
2813 */
2814 typedef enum {
2815 my_shift_method_simple = 0,
2816 my_shift_method_expand
2817 } my_coll_shift_method;
2818
2819 struct MY_COLL_RULES {
2820 MY_UCA_INFO *uca; /* Unicode weight data */
2821 size_t nrules; /* Number of rules in the rule array */
2822 size_t mrules; /* Number of allocated rules */
2823 MY_COLL_RULE *rule; /* Rule array */
2824 MY_CHARSET_LOADER *loader;
2825 my_coll_shift_method shift_after_method;
2826 };
2827
2828 /**
2829 Realloc rule array to a new size.
2830 Reallocate memory for 128 additional rules at once,
2831 to reduce the number of reallocs, which is important
2832 for long tailorings (e.g. for East Asian collations).
2833
2834 @param rules Rule container
2835 @param n new number of rules
2836
2837 @return 0 on success, -1 on error.
2838 */
2839
2840 1250794 static int my_coll_rules_realloc(MY_COLL_RULES *rules, size_t n) {
2841
3/4
✓ Branch 0 taken 30883 times.
✓ Branch 1 taken 1219911 times.
✓ Branch 2 taken 1250794 times.
✗ Branch 3 not taken.
1281677 if (rules->nrules < rules->mrules ||
2842
1/2
✓ Branch 0 taken 30883 times.
✗ Branch 1 not taken.
30883 (rules->rule = static_cast<MY_COLL_RULE *>(rules->loader->mem_realloc(
2843 30883 rules->rule, sizeof(MY_COLL_RULE) * (rules->mrules = n + 128)))))
2844 1250794 return 0;
2845 return -1;
2846 }
2847
2848 /**
2849 Append one new rule to a rule array
2850
2851 @param rules Rule container
2852 @param rule New rule to add
2853
2854 @return 0 on success, -1 on error.
2855 */
2856
2857 1250794 static int my_coll_rules_add(MY_COLL_RULES *rules, MY_COLL_RULE *rule) {
2858
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1250794 times.
1250794 if (my_coll_rules_realloc(rules, rules->nrules + 1)) return -1;
2859 1250794 rules->rule[rules->nrules++] = rule[0];
2860 1250794 return 0;
2861 }
2862
2863 /**
2864 Apply difference at level
2865
2866 @param r Rule
2867 @param level Level (0,1,2,3,4)
2868 */
2869
2870 1062235 static void my_coll_rule_shift_at_level(MY_COLL_RULE *r, int level) {
2871
5/6
✓ Branch 0 taken 37548 times.
✓ Branch 1 taken 341219 times.
✓ Branch 2 taken 93487 times.
✓ Branch 3 taken 322767 times.
✓ Branch 4 taken 267214 times.
✗ Branch 5 not taken.
1062235 switch (level) {
2872 37548 case 4: /* Quaternary difference */
2873 37548 r->diff[3]++;
2874 37548 break;
2875 341219 case 3: /* Tertiary difference */
2876 341219 r->diff[2]++;
2877 341219 r->diff[3] = 0;
2878 341219 break;
2879 93487 case 2: /* Secondary difference */
2880 93487 r->diff[1]++;
2881 93487 r->diff[2] = r->diff[3] = 0;
2882 93487 break;
2883 322767 case 1: /* Primary difference */
2884 322767 r->diff[0]++;
2885 322767 r->diff[1] = r->diff[2] = r->diff[3] = 0;
2886 322767 break;
2887 267214 case 0:
2888 /* Do nothing for '=': use the previous offsets for all levels */
2889 267214 break;
2890 default:
2891 assert(0);
2892 }
2893 1062235 }
2894
2895 struct MY_COLL_RULE_PARSER {
2896 MY_COLL_LEXEM tok[2]; /* Current token and next token for look-ahead */
2897 MY_COLL_RULE rule; /* Currently parsed rule */
2898 MY_COLL_RULES *rules; /* Rule list pointer */
2899 char errstr[128]; /* Error message */
2900 };
2901
2902 /**
2903 Current parser token
2904
2905 @param p Collation customization parser
2906
2907 @return Pointer to the current token
2908 */
2909
2910 16265125 static MY_COLL_LEXEM *my_coll_parser_curr(MY_COLL_RULE_PARSER *p) {
2911 16265125 return &p->tok[0];
2912 }
2913
2914 /**
2915 Next parser token, to look ahead.
2916
2917 @param p Collation customization parser
2918
2919 @return Pointer to the next token
2920 */
2921
2922 6902020 static MY_COLL_LEXEM *my_coll_parser_next(MY_COLL_RULE_PARSER *p) {
2923 6902020 return &p->tok[1];
2924 }
2925
2926 /**
2927 Scan one token from the input stream
2928
2929 @param p Collation customization parser
2930
2931 @return 1, for convenience, to use in logical expressions easier.
2932 */
2933 3422001 static int my_coll_parser_scan(MY_COLL_RULE_PARSER *p) {
2934 3422001 my_coll_parser_curr(p)[0] = my_coll_parser_next(p)[0];
2935 3422001 my_coll_lexem_next(my_coll_parser_next(p));
2936 3422001 return 1;
2937 }
2938
2939 /**
2940 Initialize collation customization parser
2941
2942 @param p Collation customization parser
2943 @param rules Where to store rules
2944 @param str Beginning of a collation customization string
2945 @param str_end End of the collation customizations string
2946 */
2947
2948 29009 static void my_coll_parser_init(MY_COLL_RULE_PARSER *p, MY_COLL_RULES *rules,
2949 const char *str, const char *str_end) {
2950 /*
2951 Initialize parser to the input buffer and scan two tokens,
2952 to make the current token and the next token known.
2953 */
2954 29009 memset(p, 0, sizeof(*p));
2955 29009 p->rules = rules;
2956 29009 p->errstr[0] = '\0';
2957 29009 my_coll_lexem_init(my_coll_parser_curr(p), str, str_end);
2958 29009 my_coll_lexem_next(my_coll_parser_curr(p));
2959 29009 my_coll_parser_next(p)[0] = my_coll_parser_curr(p)[0];
2960 29009 my_coll_lexem_next(my_coll_parser_next(p));
2961 29009 }
2962
2963 /**
2964 Display error when an unexpected token found
2965
2966 @param p Collation customization parser
2967 @param term Which lexem was expected
2968
2969 @return 0, to use in "return" and boolean expressions.
2970 */
2971
2972 1 static int my_coll_parser_expected_error(MY_COLL_RULE_PARSER *p,
2973 my_coll_lexem_num term) {
2974 1 snprintf(p->errstr, sizeof(p->errstr), "%s expected",
2975 my_coll_lexem_num_to_str(term));
2976 1 return 0;
2977 }
2978
2979 /**
2980 Display error when a too long character sequence is met
2981
2982 @param p Collation customization parser
2983 @param name Which kind of sequence: contraction, expansion, etc.
2984
2985 @return 0, to use in "return" and boolean expressions.
2986 */
2987
2988 static int my_coll_parser_too_long_error(MY_COLL_RULE_PARSER *p,
2989 const char *name) {
2990 snprintf(p->errstr, sizeof(p->errstr), "%s is too long", name);
2991 return 0;
2992 }
2993
2994 /**
2995 Scan the given lexem from input stream, or display "expected" error.
2996
2997 @param p Collation customization parser
2998 @param term Which lexem is expected.
2999
3000 @retval 0 if the required term was not found.
3001 @retval 1 if the required term was found.
3002 */
3003 1939132 static int my_coll_parser_scan_term(MY_COLL_RULE_PARSER *p,
3004 my_coll_lexem_num term) {
3005
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1939132 times.
1939132 if (my_coll_parser_curr(p)->term != term)
3006 return my_coll_parser_expected_error(p, term);
3007 1939132 return my_coll_parser_scan(p);
3008 }
3009
3010 /*
3011 In the following code we have a few functions to parse
3012 various collation customization non-terminal symbols.
3013 Unlike our usual coding convention, they return
3014 - 0 on "error" (when the rule was not scanned) and
3015 - 1 on "success"(when the rule was scanned).
3016 This is done intentionally to make body of the functions look easier
3017 and repeat the grammar of the rules in straightforward manner.
3018 For example:
3019
3020 // <x> ::= <y> | <z>
3021 int parse_x() { return parse_y() || parser_z(); }
3022
3023 // <x> ::= <y> <z>
3024 int parse_x() { return parse_y() && parser_z(); }
3025
3026 Using 1 on "not found" and 0 on "found" in the parser code would
3027 make the code more error prone and harder to read because
3028 of having to use inverse boolean logic.
3029 */
3030
3031 /**
3032 Scan a collation setting in brakets, for example UCA version.
3033
3034 @param p Collation customization parser
3035
3036 @retval 0 if setting was scanned.
3037 @retval 1 if setting was not scanned.
3038 */
3039
3040 11 static int my_coll_parser_scan_setting(MY_COLL_RULE_PARSER *p) {
3041 11 MY_COLL_RULES *rules = p->rules;
3042 11 MY_COLL_LEXEM *lexem = my_coll_parser_curr(p);
3043
3044
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 6 times.
11 if (!lex_cmp(lexem, STRING_WITH_LEN("[version 4.0.0]"))) {
3045 5 rules->uca = &my_uca_v400;
3046
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
6 } else if (!lex_cmp(lexem, STRING_WITH_LEN("[version 5.2.0]"))) {
3047 2 rules->uca = &my_uca_v520;
3048
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 } else if (!lex_cmp(lexem, STRING_WITH_LEN("[shift-after-method expand]"))) {
3049 2 rules->shift_after_method = my_shift_method_expand;
3050
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 } else if (!lex_cmp(lexem, STRING_WITH_LEN("[shift-after-method simple]"))) {
3051 rules->shift_after_method = my_shift_method_simple;
3052 } else {
3053 2 return 0;
3054 }
3055 9 return my_coll_parser_scan(p);
3056 }
3057
3058 /**
3059 Scan multiple collation settings
3060
3061 @param p Collation customization parser
3062
3063 @retval 0 if no settings were scanned.
3064 @retval 1 if one or more settings were scanned.
3065 */
3066
3067 29009 static int my_coll_parser_scan_settings(MY_COLL_RULE_PARSER *p) {
3068 /* Scan collation setting or special purpose command */
3069
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 29007 times.
29018 while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION) {
3070
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 9 times.
11 if (!my_coll_parser_scan_setting(p)) return 0;
3071 }
3072 29007 return 1;
3073 }
3074
3075 /**
3076 Scan [before xxx] reset option
3077
3078 @param p Collation customization parser
3079
3080 @retval 0 if reset option was not scanned.
3081 @retval 1 if reset option was scanned.
3082 */
3083
3084 25951 static int my_coll_parser_scan_reset_before(MY_COLL_RULE_PARSER *p) {
3085 25951 MY_COLL_LEXEM *lexem = my_coll_parser_curr(p);
3086
6/6
✓ Branch 0 taken 25907 times.
✓ Branch 1 taken 44 times.
✓ Branch 2 taken 6958 times.
✓ Branch 3 taken 18949 times.
✓ Branch 4 taken 7002 times.
✓ Branch 5 taken 18949 times.
51858 if (!lex_cmp(lexem, STRING_WITH_LEN("[before primary]")) ||
3087 25907 !lex_cmp(lexem, STRING_WITH_LEN("[before 1]"))) {
3088 7002 p->rule.before_level = 1;
3089
6/6
✓ Branch 0 taken 18945 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1120 times.
✓ Branch 3 taken 17825 times.
✓ Branch 4 taken 1124 times.
✓ Branch 5 taken 17825 times.
37894 } else if (!lex_cmp(lexem, STRING_WITH_LEN("[before secondary]")) ||
3090 18945 !lex_cmp(lexem, STRING_WITH_LEN("[before 2]"))) {
3091 1124 p->rule.before_level = 2;
3092
6/6
✓ Branch 0 taken 17821 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 17765 times.
✓ Branch 3 taken 56 times.
✓ Branch 4 taken 17769 times.
✓ Branch 5 taken 56 times.
35646 } else if (!lex_cmp(lexem, STRING_WITH_LEN("[before tertiary]")) ||
3093 17821 !lex_cmp(lexem, STRING_WITH_LEN("[before 3]"))) {
3094 17769 p->rule.before_level = 3;
3095
5/6
✓ Branch 0 taken 52 times.
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 52 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 52 times.
108 } else if (!lex_cmp(lexem, STRING_WITH_LEN("[before quaternary]")) ||
3096 52 !lex_cmp(lexem, STRING_WITH_LEN("[before 4]"))) {
3097 4 p->rule.before_level = 4;
3098 } else {
3099 52 p->rule.before_level = 0;
3100 52 return 0; /* Don't scan the next character */
3101 }
3102 25899 return my_coll_parser_scan(p);
3103 }
3104
3105 /**
3106 Scan logical position and add to the wide string.
3107
3108 @param p Collation customization parser
3109 @param pwc Wide string to add code to
3110 @param limit The result string cannot be longer than 'limit' characters
3111
3112 @retval 0 if logical position was not scanned.
3113 @retval 1 if logical position was scanned.
3114 */
3115
3116 70 static int my_coll_parser_scan_logical_position(MY_COLL_RULE_PARSER *p,
3117 my_wc_t *pwc, size_t limit) {
3118 70 MY_COLL_RULES *rules = p->rules;
3119 70 MY_COLL_LEXEM *lexem = my_coll_parser_curr(p);
3120
3121
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 62 times.
70 if (!lex_cmp(lexem, STRING_WITH_LEN("[first non-ignorable]")))
3122 8 lexem->code = rules->uca->first_non_ignorable;
3123
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 50 times.
62 else if (!lex_cmp(lexem, STRING_WITH_LEN("[last non-ignorable]")))
3124 12 lexem->code = rules->uca->last_non_ignorable;
3125
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 46 times.
50 else if (!lex_cmp(lexem, STRING_WITH_LEN("[first primary ignorable]")))
3126 4 lexem->code = rules->uca->first_primary_ignorable;
3127
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 42 times.
46 else if (!lex_cmp(lexem, STRING_WITH_LEN("[last primary ignorable]")))
3128 4 lexem->code = rules->uca->last_primary_ignorable;
3129
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 36 times.
42 else if (!lex_cmp(lexem, STRING_WITH_LEN("[first secondary ignorable]")))
3130 6 lexem->code = rules->uca->first_secondary_ignorable;
3131
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 32 times.
36 else if (!lex_cmp(lexem, STRING_WITH_LEN("[last secondary ignorable]")))
3132 4 lexem->code = rules->uca->last_secondary_ignorable;
3133
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 28 times.
32 else if (!lex_cmp(lexem, STRING_WITH_LEN("[first tertiary ignorable]")))
3134 4 lexem->code = rules->uca->first_tertiary_ignorable;
3135
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 24 times.
28 else if (!lex_cmp(lexem, STRING_WITH_LEN("[last tertiary ignorable]")))
3136 4 lexem->code = rules->uca->last_tertiary_ignorable;
3137
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 20 times.
24 else if (!lex_cmp(lexem, STRING_WITH_LEN("[first trailing]")))
3138 4 lexem->code = rules->uca->first_trailing;
3139
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 16 times.
20 else if (!lex_cmp(lexem, STRING_WITH_LEN("[last trailing]")))
3140 4 lexem->code = rules->uca->last_trailing;
3141
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 else if (!lex_cmp(lexem, STRING_WITH_LEN("[first variable]")))
3142 8 lexem->code = rules->uca->first_variable;
3143
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 else if (!lex_cmp(lexem, STRING_WITH_LEN("[last variable]")))
3144 8 lexem->code = rules->uca->last_variable;
3145 else
3146 return 0; /* Don't scan the next token */
3147
3148
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 70 times.
70 if (!my_coll_rule_expand(pwc, limit, lexem->code)) {
3149 /*
3150 Logical position can not be in a contraction,
3151 so the above call should never fail.
3152 Let's assert in debug version and print
3153 a nice error message in production version.
3154 */
3155 assert(0);
3156 return my_coll_parser_too_long_error(p, "Logical position");
3157 }
3158 70 return my_coll_parser_scan(p);
3159 }
3160
3161 /**
3162 Scan character list
3163
3164 @<character list@> ::= CHAR [ CHAR... ]
3165
3166 @param p Collation customization parser
3167 @param pwc Character string to add code to
3168 @param limit The result string cannot be longer than 'limit' characters
3169 @param name E.g. "contraction", "expansion"
3170
3171 @retval 0 if character sequence was not scanned.
3172 @retval 1 if character sequence was scanned.
3173 */
3174
3175 1616654 static int my_coll_parser_scan_character_list(MY_COLL_RULE_PARSER *p,
3176 my_wc_t *pwc, size_t limit,
3177 const char *name) {
3178
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1616654 times.
1616654 if (my_coll_parser_curr(p)->term != MY_COLL_LEXEM_CHAR)
3179 return my_coll_parser_expected_error(p, MY_COLL_LEXEM_CHAR);
3180
3181
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1616654 times.
1616654 if (!my_coll_rule_expand(pwc, limit, my_coll_parser_curr(p)->code))
3182 return my_coll_parser_too_long_error(p, name);
3183
3184
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1616654 times.
1616654 if (!my_coll_parser_scan_term(p, MY_COLL_LEXEM_CHAR)) return 0;
3185
3186
2/2
✓ Branch 0 taken 133639 times.
✓ Branch 1 taken 1616654 times.
1750293 while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_CHAR) {
3187
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 133639 times.
133639 if (!my_coll_rule_expand(pwc, limit, my_coll_parser_curr(p)->code))
3188 return my_coll_parser_too_long_error(p, name);
3189 133639 my_coll_parser_scan(p);
3190 }
3191 1616654 return 1;
3192 }
3193
3194 /**
3195 Scan reset sequence
3196
3197 @<reset sequence@> ::=
3198 [ @<reset before option@> ] @<character list@>
3199 | [ @<reset before option@> ] @<logical reset position@>
3200
3201 @param p Collation customization parser
3202
3203 @retval 0 if reset sequence was not scanned.
3204 @retval 1 if reset sequence was scanned.
3205 */
3206
3207 293472 static int my_coll_parser_scan_reset_sequence(MY_COLL_RULE_PARSER *p) {
3208 293472 my_coll_rule_reset(&p->rule);
3209
3210 /* Scan "[before x]" option, if exists */
3211
2/2
✓ Branch 0 taken 25951 times.
✓ Branch 1 taken 267521 times.
293472 if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION)
3212 25951 my_coll_parser_scan_reset_before(p);
3213
3214 /* Try logical reset position */
3215
2/2
✓ Branch 0 taken 70 times.
✓ Branch 1 taken 293402 times.
293472 if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION) {
3216
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 70 times.
70 if (!my_coll_parser_scan_logical_position(p, p->rule.base, 1)) return 0;
3217 } else {
3218 /* Scan single reset character or expansion */
3219
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 293402 times.
293402 if (!my_coll_parser_scan_character_list(p, p->rule.base,
3220 MY_UCA_MAX_EXPANSION, "Expansion"))
3221 return 0;
3222 }
3223
3224
2/2
✓ Branch 0 taken 293468 times.
✓ Branch 1 taken 4 times.
293472 if ((p->rules->shift_after_method == my_shift_method_expand ||
3225
2/2
✓ Branch 0 taken 7000 times.
✓ Branch 1 taken 286468 times.
293468 p->rule.before_level == 1) &&
3226
2/2
✓ Branch 0 taken 46 times.
✓ Branch 1 taken 6958 times.
7004 p->rules->uca->version < UCA_V900) /* Apply "before primary" option */
3227 {
3228 /*
3229 Suppose we have this rule: &B[before primary] < C
3230 i.e. we need to put C before B, but after A, so
3231 the result order is: A < C < B.
3232
3233 Let primary weight of B be [BBBB].
3234
3235 We cannot just use [BBBB-1] as weight for C:
3236 DUCET does not have enough unused weights between any two characters,
3237 so using [BBBB-1] will likely make C equal to the previous character,
3238 which is A, so we'll get this order instead of the desired: A = C < B.
3239
3240 To guarantee that that C is sorted after A, we'll use expansion
3241 with a kind of "biggest possible character".
3242 As "biggest possible character" we'll use "last_non_ignorable":
3243
3244 We'll compose weight for C as: [BBBB-1][MMMM+1]
3245 where [MMMM] is weight for "last_non_ignorable".
3246
3247 We also do the same trick for "reset after" if the collation
3248 option says so. E.g. for the rules "&B < C", weight for
3249 C will be calculated as: [BBBB][MMMM+1]
3250
3251 At this point we only need to store codepoints
3252 'B' and 'last_non_ignorable'. Actual weights for 'C'
3253 will be calculated according to the above formula later,
3254 in create_tailoring().
3255 */
3256
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 46 times.
46 if (!my_coll_rule_expand(p->rule.base, MY_UCA_MAX_EXPANSION,
3257 46 p->rules->uca->last_non_ignorable))
3258 return my_coll_parser_too_long_error(p, "Expansion");
3259 }
3260 293472 return 1;
3261 }
3262
3263 /**
3264 Scan shift sequence
3265
3266 @<shift sequence@> ::=
3267 @<character list@> [ / @<character list@> ]
3268 | @<character list@> [ | @<character list@> ]
3269
3270 @param p Collation customization parser
3271
3272 @retval 0 if shift sequence was not scanned.
3273 @retval 1 if shift sequence was scanned.
3274 */
3275
3276 1062235 static int my_coll_parser_scan_shift_sequence(MY_COLL_RULE_PARSER *p) {
3277 MY_COLL_RULE before_extend;
3278
3279 1062235 memset(&p->rule.curr, 0, sizeof(p->rule.curr));
3280
3281 /* Scan single shift character or contraction */
3282
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1062235 times.
1062235 if (!my_coll_parser_scan_character_list(
3283
1/2
✓ Branch 0 taken 1062235 times.
✗ Branch 1 not taken.
1062235 p, p->rule.curr, MY_UCA_MAX_CONTRACTION, "Contraction"))
3284 return 0;
3285
3286 1062235 before_extend = p->rule; /* Remember the part before "/" */
3287
3288 /* Append the part after "/" as expansion */
3289
2/2
✓ Branch 0 taken 11641 times.
✓ Branch 1 taken 1050594 times.
1062235 if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_EXTEND) {
3290
1/2
✓ Branch 0 taken 11641 times.
✗ Branch 1 not taken.
11641 my_coll_parser_scan(p);
3291
2/4
✓ Branch 0 taken 11641 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 11641 times.
11641 if (!my_coll_parser_scan_character_list(p, p->rule.base,
3292 MY_UCA_MAX_EXPANSION, "Expansion"))
3293 return 0;
3294
2/2
✓ Branch 0 taken 204479 times.
✓ Branch 1 taken 846115 times.
1050594 } else if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_CONTEXT) {
3295 /*
3296 We support 2-character long context sequences only:
3297 one character is the previous context, plus the current character.
3298 It's OK as Unicode's CLDR does not have longer examples.
3299 */
3300
1/2
✓ Branch 0 taken 204479 times.
✗ Branch 1 not taken.
204479 my_coll_parser_scan(p);
3301 204479 p->rule.with_context = true;
3302
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 204479 times.
204479 if (!my_coll_parser_scan_character_list(
3303
1/2
✓ Branch 0 taken 204479 times.
✗ Branch 1 not taken.
204479 p, p->rule.curr + 1, MY_UCA_MAX_EXPANSION - 1, "context"))
3304 return 0;
3305 /*
3306 It might be CONTEXT followed by EXPANSION. For example, Japanese
3307 collation has one rule defined as:
3308 "&[before 3]へ<<<へ|ゝ=べ|ゝ=へ|ゞ/\u3099"
3309 The part of "へ|ゞ/\u3099" is CONTEXT ('|') followed by EXPANSION ('/').
3310 */
3311
2/2
✓ Branch 0 taken 44897 times.
✓ Branch 1 taken 159582 times.
204479 if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_EXTEND) {
3312
1/2
✓ Branch 0 taken 44897 times.
✗ Branch 1 not taken.
44897 my_coll_parser_scan(p);
3313 44897 size_t len = my_wstrnlen(p->rule.base, MY_UCA_MAX_EXPANSION);
3314
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 44897 times.
44897 if (!my_coll_parser_scan_character_list(
3315
1/2
✓ Branch 0 taken 44897 times.
✗ Branch 1 not taken.
44897 p, p->rule.base + len, MY_UCA_MAX_EXPANSION - len, "Expansion"))
3316 return 0;
3317 }
3318 }
3319
3320 /* Add rule to the rule list */
3321
2/4
✓ Branch 0 taken 1062235 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1062235 times.
1062235 if (my_coll_rules_add(p->rules, &p->rule)) return 0;
3322
3323 1062235 p->rule = before_extend; /* Restore to the state before "/" */
3324
3325 1062235 return 1;
3326 }
3327
3328 /**
3329 Scan shift operator
3330
3331 @<shift@> ::= < | << | <<< | <<<< | =
3332
3333 @param p Collation customization parser
3334
3335 @retval 0 if shift operator was not scanned.
3336 @retval 1 if shift operator was scanned.
3337 */
3338 1355707 static int my_coll_parser_scan_shift(MY_COLL_RULE_PARSER *p) {
3339
2/2
✓ Branch 0 taken 1062235 times.
✓ Branch 1 taken 293472 times.
1355707 if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_SHIFT) {
3340 1062235 my_coll_rule_shift_at_level(&p->rule, my_coll_parser_curr(p)->diff);
3341 1062235 return my_coll_parser_scan(p);
3342 }
3343 293472 return 0;
3344 }
3345
3346 /**
3347 Scan one rule: reset followed by a number of shifts
3348
3349 @<rule@> ::=
3350 & @<reset sequence@>
3351 @<shift@> @<shift sequence@>
3352 [ { @<shift@> @<shift sequence@> }... ]
3353
3354 @param p Collation customization parser
3355
3356 @retval 0 if rule was not scanned.
3357 @retval 1 if rule was scanned.
3358 */
3359 293472 static int my_coll_parser_scan_rule(MY_COLL_RULE_PARSER *p) {
3360
2/4
✓ Branch 0 taken 293472 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 293472 times.
586944 if (!my_coll_parser_scan_term(p, MY_COLL_LEXEM_RESET) ||
3361
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 293472 times.
293472 !my_coll_parser_scan_reset_sequence(p))
3362 return 0;
3363
3364 /* Scan the first required shift command */
3365
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 293471 times.
293472 if (!my_coll_parser_scan_shift(p))
3366 1 return my_coll_parser_expected_error(p, MY_COLL_LEXEM_SHIFT);
3367
3368 /* Scan the first shift sequence */
3369
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 293471 times.
293471 if (!my_coll_parser_scan_shift_sequence(p)) return 0;
3370
3371 /* Scan subsequent shift rules */
3372
2/2
✓ Branch 0 taken 768764 times.
✓ Branch 1 taken 293471 times.
1062235 while (my_coll_parser_scan_shift(p)) {
3373
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 768764 times.
768764 if (!my_coll_parser_scan_shift_sequence(p)) return 0;
3374 }
3375 293471 return 1;
3376 }
3377
3378 /**
3379 Scan collation customization: settings followed by rules
3380
3381 @<collation customization@> ::=
3382 [ @<setting@> ... ]
3383 [ @<rule@>... ]
3384
3385 @param p Collation customization parser
3386
3387 @retval 0 if collation customization expression was not scanned.
3388 @retval 1 if collation customization expression was scanned.
3389 */
3390
3391 29009 static int my_coll_parser_exec(MY_COLL_RULE_PARSER *p) {
3392
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 29007 times.
29009 if (!my_coll_parser_scan_settings(p)) return 0;
3393
3394
2/2
✓ Branch 0 taken 293472 times.
✓ Branch 1 taken 29006 times.
322478 while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_RESET) {
3395
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 293471 times.
293472 if (!my_coll_parser_scan_rule(p)) return 0;
3396 }
3397 /* Make sure no unparsed input data left */
3398 29006 return my_coll_parser_scan_term(p, MY_COLL_LEXEM_EOF);
3399 }
3400
3401 /*
3402 Collation language syntax parser.
3403 Uses lexical parser.
3404
3405 @param rules Collation rule list to load to.
3406 @param str A string with collation customization.
3407 @param str_end End of the string.
3408 @param col_name Collation name
3409
3410 @retval 0 on success
3411 @retval 1 on error
3412 */
3413
3414 29009 static int my_coll_rule_parse(MY_COLL_RULES *rules, const char *str,
3415 const char *str_end, const char *col_name) {
3416 MY_COLL_RULE_PARSER p;
3417
3418
1/2
✓ Branch 0 taken 29009 times.
✗ Branch 1 not taken.
29009 my_coll_parser_init(&p, rules, str, str_end);
3419
3420
3/4
✓ Branch 0 taken 29009 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 29006 times.
29009 if (!my_coll_parser_exec(&p)) {
3421 3 rules->loader->errcode = EE_COLLATION_PARSER_ERROR;
3422
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 my_coll_lexem_print_error(my_coll_parser_curr(&p), rules->loader->errarg,
3423 sizeof(rules->loader->errarg) - 1, p.errstr,
3424 col_name);
3425 3 return 1;
3426 }
3427 29006 return 0;
3428 }
3429
3430 7536 static void spread_case_mask(uint16 *to, size_t to_stride,
3431 size_t tailored_ce_cnt, uint16 case_mask) {
3432
2/2
✓ Branch 0 taken 18526 times.
✓ Branch 1 taken 7536 times.
26062 for (size_t i = 0; i < tailored_ce_cnt; ++i) {
3433 18526 uint16 *case_weight = &to[(i * MY_UCA_900_CE_SIZE + 2) * to_stride];
3434
2/2
✓ Branch 0 taken 8164 times.
✓ Branch 1 taken 10362 times.
18526 if (*case_weight > CASE_FIRST_UPPER_MASK)
3435 8164 case_mask = *case_weight & 0xFF00;
3436
2/2
✓ Branch 0 taken 6751 times.
✓ Branch 1 taken 3611 times.
10362 else if (*case_weight)
3437 6751 *case_weight |= case_mask;
3438 }
3439 7536 }
3440
3441 /*
3442 If the collation is marked as [caseFirst upper], move all of the weights
3443 around to accommodate that. Only tailored weights are changed; for non-tailored
3444 weights, we do it on-the-fly in uca_scanner_900::apply_case_first().
3445
3446 [caseFirst upper] is a directive that says that case should override all
3447 other tertiary case concerns (in a sense, a “level 2.5”), and furthermore,
3448 that uppercase should come before lowercase. (Normally lowercase sorts
3449 before uppercase.) It is currently only used in the Danish collation.
3450
3451 This is done by looking at the tertiary weight, inferring the case from it,
3452 and then using the upper bits (which are normally unused) to signal the case.
3453 The algorithm is detailed in Unicode TR35, section 3.14, although we don't
3454 seem to follow it exactly.
3455 */
3456 1250794 static void change_weight_if_case_first(CHARSET_INFO *cs,
3457 const MY_UCA_INFO *dst, MY_COLL_RULE *r,
3458 uint16 *to, size_t to_stride,
3459 size_t curr_len,
3460 size_t tailored_ce_cnt) {
3461 /* We only need to implement [caseFirst upper] right now. */
3462
4/4
✓ Branch 0 taken 518711 times.
✓ Branch 1 taken 732083 times.
✓ Branch 2 taken 7536 times.
✓ Branch 3 taken 511175 times.
1250794 if (!(cs->coll_param && cs->coll_param->case_first == CASE_FIRST_UPPER &&
3463
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7536 times.
7536 cs->levels_for_compare == 3))
3464 1243258 return;
3465
3466
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7536 times.
7536 assert(cs->uca->version == UCA_V900);
3467
3468 // How many CEs this character has with non-ignorable primary weight.
3469 7536 int tailored_pri_cnt = 0;
3470 7536 int origin_pri_cnt = 0;
3471
2/2
✓ Branch 0 taken 12089 times.
✓ Branch 1 taken 4239 times.
16328 for (size_t i = 0; i < tailored_ce_cnt; ++i) {
3472 /*
3473 If rule A has already applied a case weight change, and we have rule B
3474 which is inherited from A, apply the same case weight change on the rest
3475 of rule B and return.
3476 */
3477
2/2
✓ Branch 0 taken 3297 times.
✓ Branch 1 taken 8792 times.
12089 if (to[(i * MY_UCA_900_CE_SIZE + 2) * to_stride] > CASE_FIRST_UPPER_MASK) {
3478 3297 spread_case_mask(to, to_stride, tailored_ce_cnt, /*case_mask=*/0);
3479 3297 return;
3480 }
3481
2/2
✓ Branch 0 taken 7222 times.
✓ Branch 1 taken 1570 times.
8792 if (to[i * MY_UCA_900_CE_SIZE * to_stride]) tailored_pri_cnt++;
3482 }
3483
3/4
✓ Branch 0 taken 1570 times.
✓ Branch 1 taken 2669 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1570 times.
4239 if (r->before_level == 1 || r->diff[0]) tailored_pri_cnt--;
3484
3485 // Use the DUCET weight to detect the character's case.
3486 4239 MY_UCA_INFO *src = &my_uca_v900;
3487 4239 int changed_ce = 0;
3488
3489 4239 my_wc_t *curr = r->curr;
3490
2/2
✓ Branch 0 taken 4710 times.
✓ Branch 1 taken 4239 times.
8949 for (size_t i = 0; i < curr_len; ++i) {
3491 4710 const uint16 *from = my_char_weight_addr_900(src, *curr);
3492 4710 uint page = *curr >> 8;
3493 4710 uint code = *curr & 0xFF;
3494 4710 curr++;
3495 4710 int ce_cnt =
3496
1/2
✓ Branch 0 taken 4710 times.
✗ Branch 1 not taken.
4710 src->weights[page] ? UCA900_NUM_OF_CE(src->weights[page], code) : 0;
3497
2/2
✓ Branch 0 taken 8792 times.
✓ Branch 1 taken 4710 times.
13502 for (int i_ce = 0; i_ce < ce_cnt; ++i_ce) {
3498
2/2
✓ Branch 0 taken 5338 times.
✓ Branch 1 taken 3454 times.
8792 if (from[i_ce * UCA900_DISTANCE_BETWEEN_WEIGHTS]) origin_pri_cnt++;
3499 }
3500 }
3501 4239 int case_to_copy = 0;
3502
2/2
✓ Branch 0 taken 3140 times.
✓ Branch 1 taken 1099 times.
4239 if (origin_pri_cnt <= tailored_pri_cnt)
3503 3140 case_to_copy = origin_pri_cnt;
3504 else
3505 1099 case_to_copy = tailored_pri_cnt - 1;
3506 4239 int upper_cnt = 0;
3507 4239 int lower_cnt = 0;
3508 4239 curr = r->curr;
3509 4239 uint16 case_mask = 0;
3510
2/2
✓ Branch 0 taken 4710 times.
✓ Branch 1 taken 4239 times.
8949 for (size_t curr_ind = 0; curr_ind < curr_len; ++curr_ind) {
3511 4710 const uint16 *from = my_char_weight_addr_900(src, *curr);
3512 4710 uint page = *curr >> 8;
3513 4710 uint code = *curr & 0xFF;
3514 4710 curr++;
3515 4710 int ce_cnt =
3516
1/2
✓ Branch 0 taken 4710 times.
✗ Branch 1 not taken.
4710 src->weights[page] ? UCA900_NUM_OF_CE(src->weights[page], code) : 0;
3517 4710 changed_ce = 0;
3518
2/2
✓ Branch 0 taken 8792 times.
✓ Branch 1 taken 4710 times.
13502 for (int i_ce = 0; i_ce < ce_cnt; ++i_ce) {
3519 8792 uint16 primary_weight = from[i_ce * UCA900_DISTANCE_BETWEEN_WEIGHTS];
3520
2/2
✓ Branch 0 taken 5338 times.
✓ Branch 1 taken 3454 times.
8792 if (primary_weight) {
3521 5338 uint16 case_weight = from[i_ce * UCA900_DISTANCE_BETWEEN_WEIGHTS +
3522 5338 2 * UCA900_DISTANCE_BETWEEN_LEVELS];
3523 5338 uint16 *ce_to = nullptr;
3524
2/2
✓ Branch 0 taken 2669 times.
✓ Branch 1 taken 2669 times.
5338 if (is_tertiary_weight_upper_case(case_weight)) {
3525
2/2
✓ Branch 0 taken 1099 times.
✓ Branch 1 taken 1570 times.
2669 if (!case_to_copy)
3526 1099 upper_cnt++;
3527 else
3528 1570 case_mask = CASE_FIRST_UPPER_MASK;
3529 } else {
3530
2/2
✓ Branch 0 taken 1099 times.
✓ Branch 1 taken 1570 times.
2669 if (!case_to_copy)
3531 1099 lower_cnt++;
3532 else
3533 1570 case_mask = CASE_FIRST_LOWER_MASK;
3534 }
3535
2/2
✓ Branch 0 taken 3140 times.
✓ Branch 1 taken 2198 times.
5338 if (case_to_copy) {
3536 do {
3537 3140 ce_to = to + changed_ce * MY_UCA_900_CE_SIZE * to_stride;
3538 3140 changed_ce++;
3539
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3140 times.
3140 } while (*ce_to == 0);
3540 3140 ce_to[2 * to_stride] |= case_mask;
3541 3140 case_to_copy--;
3542 }
3543 }
3544 }
3545 }
3546
2/2
✓ Branch 0 taken 3140 times.
✓ Branch 1 taken 1099 times.
4239 if (origin_pri_cnt <= tailored_pri_cnt) {
3547
2/2
✓ Branch 0 taken 314 times.
✓ Branch 1 taken 3140 times.
3454 for (int i = origin_pri_cnt; i < tailored_pri_cnt; ++i) {
3548 314 const int offset = changed_ce * MY_UCA_900_CE_SIZE * to_stride;
3549
2/4
✓ Branch 0 taken 314 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 314 times.
✗ Branch 3 not taken.
314 if (to[offset] && to[offset] < dst->extra_ce_pri_base)
3550 314 to[offset + 2 * to_stride] = 0;
3551 }
3552 } else {
3553
4/4
✓ Branch 0 taken 628 times.
✓ Branch 1 taken 471 times.
✓ Branch 2 taken 157 times.
✓ Branch 3 taken 471 times.
1099 if (upper_cnt && lower_cnt)
3554 157 case_mask = CASE_FIRST_MIXED_MASK;
3555
3/4
✓ Branch 0 taken 471 times.
✓ Branch 1 taken 471 times.
✓ Branch 2 taken 471 times.
✗ Branch 3 not taken.
942 else if (upper_cnt && !lower_cnt)
3556 471 case_mask = CASE_FIRST_UPPER_MASK;
3557 else
3558 471 case_mask = CASE_FIRST_LOWER_MASK;
3559 1099 bool skipped_extra_ce = false;
3560
2/2
✓ Branch 0 taken 2198 times.
✓ Branch 1 taken 1099 times.
3297 for (int i = tailored_ce_cnt - 1; i >= 0; --i) {
3561 2198 int offset = i * MY_UCA_900_CE_SIZE * to_stride;
3562
3/4
✓ Branch 0 taken 2198 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1099 times.
✓ Branch 3 taken 1099 times.
2198 if (to[offset] && to[offset] < dst->extra_ce_pri_base) {
3563
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 1099 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1099 times.
✗ Branch 5 not taken.
1099 if ((r->before_level == 1 || r->diff[0]) && !skipped_extra_ce) {
3564 1099 skipped_extra_ce = true;
3565 1099 continue;
3566 }
3567 to[(i * MY_UCA_900_CE_SIZE + 2) * to_stride] |= case_mask;
3568 break;
3569 }
3570 }
3571 }
3572 4239 spread_case_mask(to, to_stride, tailored_ce_cnt, case_mask);
3573 }
3574
3575 734767 static size_t my_char_weight_put_900(MY_UCA_INFO *dst, uint16 *to,
3576 size_t to_stride, size_t to_length,
3577 uint16 *to_num_ce,
3578 const MY_COLL_RULE *rule,
3579 size_t base_len) {
3580 size_t count;
3581 734767 int total_ce_cnt = 0;
3582
3583 734767 const my_wc_t *base = rule->base;
3584
2/2
✓ Branch 0 taken 1034642 times.
✓ Branch 1 taken 734767 times.
1769409 for (count = 0; base_len;) {
3585 1034642 const uint16 *from = nullptr;
3586 1034642 size_t from_stride = 0;
3587 1034642 int ce_cnt = 0;
3588
3589
2/2
✓ Branch 0 taken 366380 times.
✓ Branch 1 taken 1015819 times.
1382199 for (size_t chlen = base_len; chlen > 1; chlen--) {
3590
3/4
✓ Branch 0 taken 366380 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 18823 times.
✓ Branch 3 taken 347557 times.
366380 if ((from = my_uca_contraction_weight(dst->contraction_nodes, base,
3591 chlen))) {
3592 18823 from_stride = 1;
3593 18823 base += chlen;
3594 18823 base_len -= chlen;
3595 18823 ce_cnt = *(from + MY_UCA_MAX_WEIGHT_SIZE - 1);
3596 18823 break;
3597 }
3598 }
3599
3600
2/2
✓ Branch 0 taken 1015819 times.
✓ Branch 1 taken 18823 times.
1034642 if (!from) {
3601 1015819 uint page = *base >> 8;
3602 1015819 uint code = *base & 0xFF;
3603 1015819 base++;
3604 1015819 base_len--;
3605
1/2
✓ Branch 0 taken 1015819 times.
✗ Branch 1 not taken.
1015819 if (dst->weights[page]) {
3606 1015819 from = UCA900_WEIGHT_ADDR(dst->weights[page], /*level=*/0, code);
3607 1015819 from_stride = UCA900_DISTANCE_BETWEEN_LEVELS;
3608 1015819 ce_cnt = UCA900_NUM_OF_CE(dst->weights[page], code);
3609 }
3610 }
3611
3612 1034642 for (int weight_ind = 0;
3613
3/4
✓ Branch 0 taken 3741231 times.
✓ Branch 1 taken 1034642 times.
✓ Branch 2 taken 3741231 times.
✗ Branch 3 not taken.
4775873 weight_ind < ce_cnt * MY_UCA_900_CE_SIZE && count < to_length;
3614 weight_ind++) {
3615 3741231 *to = *from;
3616 3741231 to += to_stride;
3617 3741231 from += from_stride;
3618 3741231 count++;
3619 }
3620 1034642 total_ce_cnt += ce_cnt;
3621 }
3622
3623 /*
3624 For shift on primary weight, there might be no enough room in the tables.
3625 For example, Sihala has the rule "&\\u0DA5 < \\u0DA4", which means
3626 that we should move U+0DA4 after U+0DA5 (on the primary level).
3627 However, there is no room after U+0DA5 in DUCET unless we wanted to
3628 conflict with U+0DA6:
3629
3630 0DA4 ; [.28EC.0020.0002] # SINHALA LETTER TAALUJA NAASIKYAYA
3631 0DA5 ; [.28ED.0020.0002] # SINHALA LETTER TAALUJA SANYOOGA NAAKSIKYAYA
3632 0DA6 ; [.28EE.0020.0002] # SINHALA LETTER SANYAKA JAYANNA
3633
3634 Before our implementation of UCA 9.0.0, the shift on primary weight was
3635 done by making it a fake expansion when parsing the rule, where we'd expand
3636 U+0DA4 to U+0DA5 U+MMMM, MMMM being 'last_non_ignorable'. (This happens
3637 in my_coll_parser_scan_reset_sequence()). But from UCA 9.0.0, we also
3638 support accent- and case-sensitive collations, and then, having the extra
3639 weights of 'last_non_ignorable' (which is just a random character) on the
3640 second and third level may cause unexpected results for algorithms that
3641 use the meaning of the tertiary weight to infer case. Thus, we'll abandon
3642 the fake expansion way; instead, instead add an extra CE (after the one
3643 from U+0DA5, the character we are moving after) to represent all the
3644 weights we might want to shift. The actual shifting happens in
3645 apply_shift_900().
3646
3647 For the rule "&\\u0DA5 < \\u0DA4", U+0DA4's weights become
3648 [.28ED.0020.0002][.54A4.0000.0000], where 0x54A4 is the value of
3649 extra_ce_pri_base. We then apply the differences from the rule
3650 (which are never negative) to the last CE, so that it becomes
3651 e.g. [.54A5.0000.0000].
3652 */
3653
7/8
✓ Branch 0 taken 617612 times.
✓ Branch 1 taken 117155 times.
✓ Branch 2 taken 584834 times.
✓ Branch 3 taken 32778 times.
✓ Branch 4 taken 287768 times.
✓ Branch 5 taken 297066 times.
✓ Branch 6 taken 437701 times.
✗ Branch 7 not taken.
734767 if ((rule->diff[0] || rule->diff[1] || rule->diff[2]) && count < to_length) {
3654
2/2
✓ Branch 0 taken 117155 times.
✓ Branch 1 taken 320546 times.
437701 *to = rule->diff[0] ? dst->extra_ce_pri_base : 0;
3655 437701 to += to_stride;
3656
2/2
✓ Branch 0 taken 46742 times.
✓ Branch 1 taken 390959 times.
437701 *to = rule->diff[1] ? dst->extra_ce_sec_base : 0;
3657 437701 to += to_stride;
3658
2/2
✓ Branch 0 taken 371062 times.
✓ Branch 1 taken 66639 times.
437701 *to = rule->diff[2] ? dst->extra_ce_ter_base : 0;
3659 437701 to += to_stride;
3660 437701 total_ce_cnt++;
3661 437701 count += 3;
3662 }
3663 734767 total_ce_cnt =
3664 734767 std::min(total_ce_cnt, (MY_UCA_MAX_WEIGHT_SIZE - 1) / MY_UCA_900_CE_SIZE);
3665 734767 *to_num_ce = total_ce_cnt;
3666
3667 734767 return total_ce_cnt;
3668 }
3669
3670 /**
3671 Helper function:
3672 Copies UCA weights for a given "uint" string
3673 to the given location.
3674
3675 @param dst destination UCA weight data
3676 @param to destination address
3677 @param to_stride number of bytes between each successive weight in "to"
3678 @param to_length size of destination
3679 @param to_num_ce where to put the number of CEs generated
3680 @param rule The rule that contains the characters whose weight
3681 are to copied
3682 @param base_len The length of base character list
3683 @param uca_ver UCA version
3684
3685 @return number of weights put
3686 */
3687
3688 1250794 static size_t my_char_weight_put(MY_UCA_INFO *dst, uint16 *to, size_t to_stride,
3689 size_t to_length, uint16 *to_num_ce,
3690 const MY_COLL_RULE *rule, size_t base_len,
3691 enum_uca_ver uca_ver) {
3692
2/2
✓ Branch 0 taken 734767 times.
✓ Branch 1 taken 516027 times.
1250794 if (uca_ver == UCA_V900)
3693 734767 return my_char_weight_put_900(dst, to, to_stride, to_length, to_num_ce,
3694 734767 rule, base_len);
3695
3696 516027 const my_wc_t *base = rule->base;
3697 516027 size_t count = 0;
3698
2/2
✓ Branch 0 taken 525319 times.
✓ Branch 1 taken 516027 times.
1041346 while (base_len != 0) {
3699 525319 const uint16 *from = nullptr;
3700
3701
2/2
✓ Branch 0 taken 10608 times.
✓ Branch 1 taken 524115 times.
534723 for (size_t chlen = base_len; chlen > 1; chlen--) {
3702
2/2
✓ Branch 0 taken 1204 times.
✓ Branch 1 taken 9404 times.
10608 if ((from = my_uca_contraction_weight(dst->contraction_nodes, base,
3703 chlen))) {
3704 1204 base += chlen;
3705 1204 base_len -= chlen;
3706 1204 break;
3707 }
3708 }
3709
3710
2/2
✓ Branch 0 taken 524115 times.
✓ Branch 1 taken 1204 times.
525319 if (!from) {
3711 524115 from = my_char_weight_addr(dst, *base);
3712 524115 base++;
3713 524115 base_len--;
3714 }
3715
3716
4/6
✓ Branch 0 taken 1047646 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 522327 times.
✓ Branch 3 taken 525319 times.
✓ Branch 4 taken 522327 times.
✗ Branch 5 not taken.
1047646 for (; from && *from && count < to_length;) {
3717 522327 *to = *from++;
3718 522327 to += to_stride;
3719 522327 count++;
3720 }
3721 }
3722
3723 516027 *to = 0;
3724 516027 return count;
3725 }
3726
3727 /**
3728 Alloc new page and copy the default UCA weights
3729 @param cs Character set
3730 @param loader Character set loader
3731 @param src Default UCA data to copy from
3732 @param dst UCA data to copy weights to
3733 @param page page number
3734
3735 @retval false on success
3736 @retval true on error
3737 */
3738 81306 static bool my_uca_copy_page(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
3739 const MY_UCA_INFO *src, MY_UCA_INFO *dst,
3740 size_t page) {
3741 81306 const uint dst_size = 256 * dst->lengths[page] * sizeof(uint16);
3742
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 81306 times.
81306 if (!(dst->weights[page] = (uint16 *)(loader->once_alloc)(dst_size)))
3743 return true;
3744
3745
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 81306 times.
81306 assert(src->lengths[page] <= dst->lengths[page]);
3746 81306 memset(dst->weights[page], 0, dst_size);
3747
3/4
✓ Branch 0 taken 81306 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 50238 times.
✓ Branch 3 taken 31068 times.
81306 if (cs->uca && cs->uca->version == UCA_V900) {
3748 50238 const uint src_size = 256 * src->lengths[page] * sizeof(uint16);
3749 50238 memcpy(dst->weights[page], src->weights[page], src_size);
3750
2/2
✓ Branch 0 taken 30734 times.
✓ Branch 1 taken 334 times.
81306 } else if (src->lengths[page] > 0) {
3751
2/2
✓ Branch 0 taken 7867904 times.
✓ Branch 1 taken 30734 times.
7898638 for (uint chc = 0; chc < 256; chc++) {
3752 7867904 memcpy(dst->weights[page] + chc * dst->lengths[page],
3753 7867904 src->weights[page] + chc * src->lengths[page],
3754 7867904 src->lengths[page] * sizeof(uint16));
3755 }
3756 }
3757 81306 return false;
3758 }
3759
3760 /*
3761 This is used to apply the weight shift if there is a [before 1] rule.
3762 If we have a rule "&[before 1] A < B < C", and A's collation element is [P, S,
3763 T], then in my_char_weight_put_900(), we append one extra collation element to
3764 A's CE to be B and C's CE. So B and C's CE becomes [P, S, T][p, 0, 0]. What we
3765 do with this function is to change B's CE to [P - 1, S, T][p + n, 0, 0].
3766 1. The rule "&[before 1] A < B < C" means "B < C < A" on primary level. Since
3767 "B < A", so we give B the first primary weight as (P - 1).
3768 2. p is a weight value which is the maximum regular primary weight in DUCET
3769 plus one (0x54A3 + 1 = 0x54A4). This is to make sure B's primary weight
3770 less than A and greater than any character which sorts before A.
3771 3. n is the number of characters in this rule's character list. For the B in
3772 this rule, n = 1. For the C in this rule, n = 2. This can make sure "B <
3773 C".
3774
3775 It is the same thing that apply_secondary_shift_900() and
3776 apply_tertiary_shift_900() do, but on different weight levels.
3777 */
3778 38970 static bool apply_primary_shift_900(MY_CHARSET_LOADER *loader,
3779 MY_COLL_RULES *rules, MY_COLL_RULE *r,
3780 uint16 *to, size_t to_stride,
3781 size_t nweights,
3782 uint16 *const last_weight_ptr) {
3783 /*
3784 Find the second-to-last non-ignorable primary weight to apply shift,
3785 because the last one is the extra CE we added in my_char_weight_put_900().
3786 */
3787 38970 int last_sec_pri = 0;
3788
1/2
✓ Branch 0 taken 38970 times.
✗ Branch 1 not taken.
38970 for (last_sec_pri = nweights - 2; last_sec_pri >= 0; --last_sec_pri) {
3789
1/2
✓ Branch 0 taken 38970 times.
✗ Branch 1 not taken.
38970 if (to[last_sec_pri * to_stride * MY_UCA_900_CE_SIZE]) break;
3790 }
3791
1/2
✓ Branch 0 taken 38970 times.
✗ Branch 1 not taken.
38970 if (last_sec_pri >= 0) {
3792 38970 to[last_sec_pri * to_stride * MY_UCA_900_CE_SIZE]--; /* Reset before */
3793
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 38970 times.
38970 if (rules->shift_after_method == my_shift_method_expand) {
3794 /*
3795 Special case. Don't let characters shifted after X
3796 and before next(X) intermix to each other.
3797
3798 For example:
3799 "[shift-after-method expand] &0 < a &[before primary]1 < A".
3800 I.e. we reorder 'a' after '0', and then 'A' before '1'.
3801 'a' must be sorted before 'A'.
3802
3803 Note, there are no real collations in CLDR which shift
3804 after and before two neighbouring characters. We need this
3805 just in case. Reserving 4096 (0x1000) weights for such
3806 cases is perfectly enough.
3807 */
3808 /* W3-TODO: const may vary on levels 2,3*/
3809 last_weight_ptr[0] += 0x1000;
3810 }
3811 } else {
3812 loader->errcode = EE_FAILED_TO_RESET_BEFORE_PRIMARY_IGNORABLE_CHAR;
3813 snprintf(loader->errarg, sizeof(loader->errarg), "U+%04lX", r->base[0]);
3814 return true;
3815 }
3816 38970 return false;
3817 }
3818
3819 /*
3820 This is used to apply the weight shift if there is a [before 2] rule. Please
3821 see the comment on apply_primary_shift_900().
3822 */
3823 8960 static bool apply_secondary_shift_900(MY_CHARSET_LOADER *loader,
3824 MY_COLL_RULES *rules, MY_COLL_RULE *r,
3825 uint16 *to, size_t to_stride,
3826 size_t nweights,
3827 uint16 *const last_weight_ptr) {
3828 /*
3829 Find the second-to-last non-ignorable secondary weight to apply shift,
3830 because the last one is the extra CE we added in my_char_weight_put_900().
3831 */
3832 int last_sec_sec;
3833
1/2
✓ Branch 0 taken 8960 times.
✗ Branch 1 not taken.
8960 for (last_sec_sec = nweights - 2; last_sec_sec >= 0; --last_sec_sec) {
3834
1/2
✓ Branch 0 taken 8960 times.
✗ Branch 1 not taken.
8960 if (to[last_sec_sec * MY_UCA_900_CE_SIZE * to_stride + to_stride]) break;
3835 }
3836
1/2
✓ Branch 0 taken 8960 times.
✗ Branch 1 not taken.
8960 if (last_sec_sec >= 0) {
3837 // Reset before.
3838 8960 to[last_sec_sec * MY_UCA_900_CE_SIZE * to_stride + to_stride]--;
3839
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8960 times.
8960 if (rules->shift_after_method == my_shift_method_expand) {
3840 /*
3841 Same reason as in apply_primary_shift_900(), reserve 256 (0x100)
3842 weights for secondary level.
3843 */
3844 last_weight_ptr[to_stride] += 0x100;
3845 }
3846 } else {
3847 loader->errcode = EE_FAILED_TO_RESET_BEFORE_SECONDARY_IGNORABLE_CHAR;
3848 snprintf(loader->errarg, sizeof(loader->errarg), "U+%04lX", r->base[0]);
3849 return true;
3850 }
3851 8960 return false;
3852 }
3853
3854 /*
3855 This is used to apply the weight shift if there is a [before 3] rule. Please
3856 see the comment on apply_primary_shift_900().
3857 */
3858 204459 static bool apply_tertiary_shift_900(MY_CHARSET_LOADER *loader,
3859 MY_COLL_RULES *rules, MY_COLL_RULE *r,
3860 uint16 *to, size_t to_stride,
3861 size_t nweights,
3862 uint16 *const last_weight_ptr) {
3863 /*
3864 Find the second-to-last non-ignorable tertiary weight to apply shift,
3865 because the last one is the extra CE we added in my_char_weight_put_900().
3866 */
3867 int last_sec_ter;
3868
1/2
✓ Branch 0 taken 204459 times.
✗ Branch 1 not taken.
204459 for (last_sec_ter = nweights - 2; last_sec_ter >= 0; --last_sec_ter) {
3869
1/2
✓ Branch 0 taken 204459 times.
✗ Branch 1 not taken.
204459 if (to[last_sec_ter * MY_UCA_900_CE_SIZE * to_stride + 2 * to_stride])
3870 204459 break;
3871 }
3872
1/2
✓ Branch 0 taken 204459 times.
✗ Branch 1 not taken.
204459 if (last_sec_ter >= 0) {
3873 // Reset before.
3874 204459 to[last_sec_ter * MY_UCA_900_CE_SIZE * to_stride + 2 * to_stride]--;
3875
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 204459 times.
204459 if (rules->shift_after_method == my_shift_method_expand) {
3876 /*
3877 Same reason as in apply_primary_shift_900(), reserve 16 (0x10)
3878 weights for tertiary level.
3879 */
3880 last_weight_ptr[to_stride * 2] += 0x10;
3881 }
3882 } else {
3883 loader->errcode = EE_FAILED_TO_RESET_BEFORE_TERTIARY_IGNORABLE_CHAR;
3884 snprintf(loader->errarg, sizeof(loader->errarg), "U+%04lX", r->base[0]);
3885 return true;
3886 }
3887 204459 return false;
3888 }
3889
3890 734767 static bool apply_shift_900(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
3891 MY_COLL_RULE *r, uint16 *to, size_t to_stride,
3892 size_t nweights) {
3893 // nweights should not less than 1 because of the extra CE.
3894
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 734767 times.
734767 assert(nweights);
3895 // Apply level difference.
3896 734767 uint16 *const last_weight_ptr =
3897 734767 to + (nweights - 1) * to_stride * MY_UCA_900_CE_SIZE;
3898 734767 last_weight_ptr[0] += r->diff[0];
3899 734767 last_weight_ptr[to_stride] += r->diff[1];
3900 734767 last_weight_ptr[to_stride * 2] += r->diff[2];
3901
2/2
✓ Branch 0 taken 38970 times.
✓ Branch 1 taken 695797 times.
734767 if (r->before_level == 1) // Apply "&[before primary]".
3902 38970 return apply_primary_shift_900(loader, rules, r, to, to_stride, nweights,
3903 38970 last_weight_ptr);
3904
2/2
✓ Branch 0 taken 8960 times.
✓ Branch 1 taken 686837 times.
695797 else if (r->before_level == 2) // Apply "[before 2]".
3905 8960 return apply_secondary_shift_900(loader, rules, r, to, to_stride, nweights,
3906 8960 last_weight_ptr);
3907
2/2
✓ Branch 0 taken 204459 times.
✓ Branch 1 taken 482378 times.
686837 else if (r->before_level == 3) // Apply "[before 3]".
3908 204459 return apply_tertiary_shift_900(loader, rules, r, to, to_stride, nweights,
3909 204459 last_weight_ptr);
3910 482378 return false;
3911 }
3912
3913 1250794 static bool apply_shift(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
3914 MY_COLL_RULE *r, int level, uint16 *to,
3915 size_t to_stride, size_t nweights) {
3916
2/2
✓ Branch 0 taken 734767 times.
✓ Branch 1 taken 516027 times.
1250794 if (rules->uca->version == UCA_V900)
3917 734767 return apply_shift_900(loader, rules, r, to, to_stride, nweights);
3918
3919
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 516027 times.
516027 assert(to_stride == 1);
3920
3921 /* Apply level difference. */
3922
2/2
✓ Branch 0 taken 512747 times.
✓ Branch 1 taken 3280 times.
516027 if (nweights) {
3923 512747 to[nweights - 1] += r->diff[0];
3924
2/2
✓ Branch 0 taken 186 times.
✓ Branch 1 taken 512561 times.
512747 if (r->before_level == 1) /* Apply "&[before primary]" */
3925 {
3926
2/2
✓ Branch 0 taken 184 times.
✓ Branch 1 taken 2 times.
186 if (nweights >= 2) {
3927 184 to[nweights - 2]--; /* Reset before */
3928
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 128 times.
184 if (rules->shift_after_method == my_shift_method_expand) {
3929 /*
3930 Special case. Don't let characters shifted after X
3931 and before next(X) intermix to each other.
3932
3933 For example:
3934 "[shift-after-method expand] &0 < a &[before primary]1 < A".
3935 I.e. we reorder 'a' after '0', and then 'A' before '1'.
3936 'a' must be sorted before 'A'.
3937
3938 Note, there are no real collations in CLDR which shift
3939 after and before two neighbour characters. We need this
3940 just in case. Reserving 4096 (0x1000) weights for such
3941 cases is perfectly enough.
3942 */
3943 /* W3-TODO: const may vary on levels 2,3*/
3944 56 to[nweights - 1] += 0x1000;
3945 }
3946 } else {
3947 2 loader->errcode = EE_FAILED_TO_RESET_BEFORE_PRIMARY_IGNORABLE_CHAR;
3948 2 snprintf(loader->errarg, sizeof(loader->errarg), "U+%04lX", r->base[0]);
3949 2 return true;
3950 }
3951 }
3952 } else {
3953 /* Shift to an ignorable character, e.g.: & \u0000 < \u0001 */
3954
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3280 times.
3280 assert(to[0] == 0);
3955 3280 to[0] = r->diff[level];
3956 }
3957 516025 return false;
3958 }
3959
3960 267155 static MY_CONTRACTION *add_contraction_to_trie(
3961 std::vector<MY_CONTRACTION> *cont_nodes, MY_COLL_RULE *r) {
3962 267155 MY_CONTRACTION new_node{0, {}, {}, {}, false, 0};
3963
2/2
✓ Branch 0 taken 204479 times.
✓ Branch 1 taken 62676 times.
267155 if (r->with_context) // previous-context contraction
3964 {
3965
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 204479 times.
204479 assert(my_wstrnlen(r->curr, MY_UCA_MAX_CONTRACTION) == 2);
3966 std::vector<MY_CONTRACTION>::iterator node_it =
3967
1/2
✓ Branch 0 taken 204479 times.
✗ Branch 1 not taken.
204479 find_contraction_part_in_trie(*cont_nodes, r->curr[1]);
3968
6/6
✓ Branch 0 taken 203510 times.
✓ Branch 1 taken 969 times.
✓ Branch 2 taken 650 times.
✓ Branch 3 taken 202860 times.
✓ Branch 4 taken 1619 times.
✓ Branch 5 taken 202860 times.
204479 if (node_it == cont_nodes->end() || node_it->ch != r->curr[1]) {
3969 1619 new_node.ch = r->curr[1];
3970
1/2
✓ Branch 0 taken 1619 times.
✗ Branch 1 not taken.
1619 node_it = cont_nodes->insert(node_it, new_node);
3971 }
3972 204479 cont_nodes = &node_it->child_nodes_context;
3973
3974
1/2
✓ Branch 0 taken 204479 times.
✗ Branch 1 not taken.
204479 node_it = find_contraction_part_in_trie(*cont_nodes, r->curr[0]);
3975
6/6
✓ Branch 0 taken 175066 times.
✓ Branch 1 taken 29413 times.
✓ Branch 2 taken 173128 times.
✓ Branch 3 taken 1938 times.
✓ Branch 4 taken 202541 times.
✓ Branch 5 taken 1938 times.
204479 if (node_it == cont_nodes->end() || node_it->ch != r->curr[0]) {
3976 202541 new_node.ch = r->curr[0];
3977
1/2
✓ Branch 0 taken 202541 times.
✗ Branch 1 not taken.
202541 node_it = cont_nodes->insert(node_it, new_node);
3978 }
3979 204479 node_it->is_contraction_tail = true;
3980 204479 node_it->contraction_len = 2;
3981 204479 return &(*node_it);
3982 } else // normal contraction
3983 {
3984 62676 size_t contraction_len = my_wstrnlen(r->curr, MY_UCA_MAX_CONTRACTION);
3985 62676 std::vector<MY_CONTRACTION>::iterator node_it;
3986
2/2
✓ Branch 0 taken 137833 times.
✓ Branch 1 taken 62676 times.
200509 for (size_t ch_ind = 0; ch_ind < contraction_len; ++ch_ind) {
3987
1/2
✓ Branch 0 taken 137833 times.
✗ Branch 1 not taken.
137833 node_it = find_contraction_part_in_trie(*cont_nodes, r->curr[ch_ind]);
3988
6/6
✓ Branch 0 taken 77932 times.
✓ Branch 1 taken 59901 times.
✓ Branch 2 taken 42511 times.
✓ Branch 3 taken 35421 times.
✓ Branch 4 taken 102412 times.
✓ Branch 5 taken 35421 times.
137833 if (node_it == cont_nodes->end() || node_it->ch != r->curr[ch_ind]) {
3989 102412 new_node.ch = r->curr[ch_ind];
3990
1/2
✓ Branch 0 taken 102412 times.
✗ Branch 1 not taken.
102412 node_it = cont_nodes->insert(node_it, new_node);
3991 }
3992 137833 cont_nodes = &node_it->child_nodes;
3993 }
3994 62676 node_it->is_contraction_tail = true;
3995 62676 node_it->contraction_len = contraction_len;
3996 62676 return &(*node_it);
3997 }
3998 267155 }
3999
4000 1250794 static bool apply_one_rule(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
4001 MY_COLL_RULES *rules, MY_COLL_RULE *r, int level,
4002 MY_UCA_INFO *dst) {
4003 size_t nweights;
4004 1250794 size_t nreset = my_coll_rule_reset_length(r); /* Length of reset sequence */
4005 1250794 size_t nshift = my_coll_rule_shift_length(r); /* Length of shift sequence */
4006 uint16 *to, *to_num_ce;
4007 size_t to_stride;
4008
4009
2/2
✓ Branch 0 taken 267155 times.
✓ Branch 1 taken 983639 times.
1250794 if (nshift >= 2) /* Contraction */
4010 {
4011 size_t i;
4012 int flag;
4013 /* Add HEAD, MID and TAIL flags for the contraction parts */
4014 267155 my_uca_add_contraction_flag(
4015 dst->contraction_flags, r->curr[0],
4016
2/2
✓ Branch 0 taken 204479 times.
✓ Branch 1 taken 62676 times.
267155 r->with_context ? MY_UCA_PREVIOUS_CONTEXT_HEAD : MY_UCA_CNT_HEAD);
4017
2/2
✓ Branch 0 taken 12481 times.
✓ Branch 1 taken 267155 times.
279636 for (i = 1, flag = MY_UCA_CNT_MID1; i < nshift - 1; i++, flag <<= 1)
4018 12481 my_uca_add_contraction_flag(dst->contraction_flags, r->curr[i], flag);
4019 267155 my_uca_add_contraction_flag(
4020 dst->contraction_flags, r->curr[i],
4021
2/2
✓ Branch 0 taken 204479 times.
✓ Branch 1 taken 62676 times.
267155 r->with_context ? MY_UCA_PREVIOUS_CONTEXT_TAIL : MY_UCA_CNT_TAIL);
4022 /* Add new contraction to the contraction list */
4023 MY_CONTRACTION *trie_node =
4024 267155 add_contraction_to_trie(dst->contraction_nodes, r);
4025 267155 to = trie_node->weight;
4026 267155 to_stride = 1;
4027 267155 to_num_ce = &to[MY_UCA_MAX_WEIGHT_SIZE - 1];
4028 /* Store weights of the "reset to" character */
4029 nweights =
4030 267155 my_char_weight_put(dst, to, to_stride, MY_UCA_MAX_WEIGHT_SIZE - 1,
4031 267155 to_num_ce, r, nreset, rules->uca->version);
4032 } else {
4033 983639 my_wc_t pagec = (r->curr[0] >> 8);
4034
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 983639 times.
983639 assert(dst->weights[pagec]);
4035
3/4
✓ Branch 0 taken 983639 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 493240 times.
✓ Branch 3 taken 490399 times.
983639 if (cs->uca && cs->uca->version == UCA_V900) {
4036 493240 to = my_char_weight_addr_900(dst, r->curr[0]);
4037 493240 to_stride = UCA900_DISTANCE_BETWEEN_LEVELS;
4038 493240 to_num_ce = to - UCA900_DISTANCE_BETWEEN_LEVELS;
4039 } else {
4040 490399 to = my_char_weight_addr(dst, r->curr[0]);
4041 490399 to_stride = 1;
4042 490399 to_num_ce = to + (dst->lengths[pagec] - 1);
4043 }
4044 /* Store weights of the "reset to" character */
4045
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 983639 times.
983639 if (dst->lengths[pagec] == 0)
4046 nweights = 0;
4047 else
4048 983639 nweights = my_char_weight_put(dst, to, to_stride, dst->lengths[pagec] - 1,
4049 983639 to_num_ce, r, nreset, rules->uca->version);
4050 }
4051
4052 1250794 change_weight_if_case_first(cs, dst, r, to, to_stride, nshift, nweights);
4053 /* Apply level difference. */
4054 1250794 return apply_shift(loader, rules, r, level, to, to_stride, nweights);
4055 }
4056
4057 /**
4058 Check if collation rules are valid,
4059 i.e. characters are not outside of the collation supported range.
4060 */
4061 29006 static int check_rules(MY_CHARSET_LOADER *loader, const MY_COLL_RULES *rules,
4062 const MY_UCA_INFO *dst, const MY_UCA_INFO *src) {
4063 const MY_COLL_RULE *r, *rlast;
4064
2/2
✓ Branch 0 taken 1250794 times.
✓ Branch 1 taken 29006 times.
1279800 for (r = rules->rule, rlast = rules->rule + rules->nrules; r < rlast; r++) {
4065
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1250794 times.
1250794 if (r->curr[0] > dst->maxchar) {
4066 loader->errcode = EE_SHIFT_CHAR_OUT_OF_RANGE;
4067 snprintf(loader->errarg, sizeof(loader->errarg), "u%04X",
4068 (uint)r->curr[0]);
4069 return true;
4070
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1250794 times.
1250794 } else if (r->base[0] > src->maxchar) {
4071 loader->errcode = EE_RESET_CHAR_OUT_OF_RANGE;
4072 snprintf(loader->errarg, sizeof(loader->errarg), "u%04X",
4073 (uint)r->base[0]);
4074 return true;
4075 }
4076 }
4077 29006 return false;
4078 }
4079
4080 9359 static void synthesize_lengths_900(uchar *lengths, const uint16 *const *weights,
4081 uint npages) {
4082
2/2
✓ Branch 0 taken 40730368 times.
✓ Branch 1 taken 9359 times.
40739727 for (uint page = 0; page < npages; ++page) {
4083 40730368 int max_len = 0;
4084
2/2
✓ Branch 0 taken 1396105 times.
✓ Branch 1 taken 39334263 times.
40730368 if (weights[page]) {
4085
2/2
✓ Branch 0 taken 357402880 times.
✓ Branch 1 taken 1396105 times.
358798985 for (uint code = 0; code < 256; ++code) {
4086 357402880 max_len = std::max<int>(max_len, weights[page][code]);
4087 }
4088 }
4089
2/2
✓ Branch 0 taken 39334263 times.
✓ Branch 1 taken 1396105 times.
40730368 if (max_len == 0)
4090 39334263 lengths[page] = 0;
4091 else
4092 1396105 lengths[page] = max_len * MY_UCA_900_CE_SIZE + 1;
4093 }
4094 9359 }
4095
4096 28846 static void copy_ja_han_pages(const CHARSET_INFO *cs, MY_UCA_INFO *dst) {
4097
3/4
✓ Branch 0 taken 28846 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 9199 times.
✓ Branch 3 taken 19647 times.
28846 if (!cs->uca || cs->uca->version != UCA_V900 ||
4098
2/2
✓ Branch 0 taken 8876 times.
✓ Branch 1 taken 323 times.
9199 cs->coll_param != &ja_coll_param)
4099 28523 return;
4100
2/2
✓ Branch 0 taken 26486 times.
✓ Branch 1 taken 323 times.
26809 for (int page = MIN_JA_HAN_PAGE; page <= MAX_JA_HAN_PAGE; page++) {
4101 // In DUCET, weight is not assigned to code points in [U+4E00, U+9FFF].
4102 // When re-initializing (after my_coll_uninit_uca), the weights
4103 // may already be set.
4104
3/4
✓ Branch 0 taken 738 times.
✓ Branch 1 taken 25748 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 738 times.
26486 assert(dst->weights[page] == nullptr ||
4105 dst->weights[page] == ja_han_pages[page - MIN_JA_HAN_PAGE]);
4106 26486 dst->weights[page] = ja_han_pages[page - MIN_JA_HAN_PAGE];
4107 }
4108 }
4109
4110 /*
4111 We have reordered all the characters in the pages which contains Chinese Han
4112 characters with uca9dump (see dump_zh_pages() in uca9-dump.cc). Replace the
4113 DUCET pages with these pages.
4114 */
4115 160 static void copy_zh_han_pages(MY_UCA_INFO *dst) {
4116
2/2
✓ Branch 0 taken 107680 times.
✓ Branch 1 taken 160 times.
107840 for (int page = MIN_ZH_HAN_PAGE; page <= MAX_ZH_HAN_PAGE; page++) {
4117
2/2
✓ Branch 0 taken 47200 times.
✓ Branch 1 taken 60480 times.
107680 if (zh_han_pages[page - MIN_ZH_HAN_PAGE]) {
4118 47200 dst->weights[page] = zh_han_pages[page - MIN_ZH_HAN_PAGE];
4119 }
4120 }
4121 160 }
4122
4123 /*
4124 UCA defines an algorithm to calculate character's implicit weight if this
4125 character's weight is not defined in the DUCET. This function is to help
4126 convert Chinese character's implicit weight calculated by UCA back to its code
4127 points.
4128 The implicit weight and the code point is not 1 : 1 map because DUCET lets
4129 some characters share implicit primary weight. For example, the DUCET defines
4130 "2F00 ; [.FB40.0020.0004][.CE00.0000.0000] # KANGXI RADICAL ONE", and 4E00's
4131 implicit weight is [.FB40.0020.0002][.CE00.0000.0000]. We can see the primary
4132 weights of U+2F00 and U+4E00 are same (FB40 CE00).
4133
4134 But for the Han characters in zh.xml file, each one has unique implicit
4135 weight.
4136 */
4137 498638 static inline my_wc_t convert_implicit_to_ch(uint16 first, uint16 second) {
4138 /*
4139 For reference, here is how UCA calculates one character's implicit weight.
4140 AAAA = 0xFB40 + (CP >> 15) # The 0xFB40 changes for different character
4141 # groups
4142 BBBB = (CP & 0x7FFF) | 0x8000
4143 */
4144
2/2
✓ Branch 0 taken 201280 times.
✓ Branch 1 taken 297358 times.
498638 if (first < 0xFB80)
4145 201280 return (((first - 0xFB40) << 15) | (second & 0x7FFF));
4146
2/2
✓ Branch 0 taken 28574 times.
✓ Branch 1 taken 268784 times.
297358 else if (first < 0xFBC0)
4147 28574 return (((first - 0xFB80) << 15) | (second & 0x7FFF));
4148 else
4149 268784 return (((first - 0xFBC0) << 15) | (second & 0x7FFF));
4150 }
4151
4152 /*
4153 Usually we do reordering in apply_reorder_param(). But for the Chinese
4154 collation, since we want to remove the weight gap between the character groups
4155 (see the comment on change_zh_implicit()), and we have done the reordering for
4156 some characters in the pages which contains Chinese Han characters, if we
4157 still use apply_reorder_param() to do the reordering for other characters, we
4158 might meet weight conflict. For example, in the DUCET page, 'A' has primary
4159 weight 0x1C47, but this value has been assigned to the first Chinese Han
4160 character in CLDR zh.xml file.
4161 So we do the reordering for all the DUCET pages when initializing the
4162 collation.
4163 */
4164 160 static void modify_all_zh_pages(Reorder_param *reorder_param, MY_UCA_INFO *dst,
4165 int npages) {
4166 160 std::map<int, int> zh_han_to_single_weight_map;
4167
2/2
✓ Branch 0 taken 6613760 times.
✓ Branch 1 taken 160 times.
6613920 for (int i = 0; i < ZH_HAN_WEIGHT_PAIRS; i++) {
4168 6613760 zh_han_to_single_weight_map[zh_han_to_single_weight[i * 2]] =
4169
1/2
✓ Branch 0 taken 6613760 times.
✗ Branch 1 not taken.
6613760 zh_han_to_single_weight[i * 2 + 1];
4170 }
4171
4172
2/2
✓ Branch 0 taken 696320 times.
✓ Branch 1 taken 160 times.
696480 for (int page = 0; page < npages; page++) {
4173 /*
4174 If there is no page in the DUCET, then all the characters in this page
4175 must have implicit weight. The reordering for it will be done by
4176 change_zh_implicit(). Do not need to change here.
4177 If there is page in zh_han_pages[], then all the characters in this page
4178 have been reordered by uca9dump. Do not need to change here.
4179 */
4180
2/2
✓ Branch 0 taken 24716 times.
✓ Branch 1 taken 671604 times.
696320 if (!dst->weights[page] ||
4181
4/4
✓ Branch 0 taken 17356 times.
✓ Branch 1 taken 7360 times.
✓ Branch 2 taken 16556 times.
✓ Branch 3 taken 800 times.
24716 (page >= MIN_ZH_HAN_PAGE && page <= MAX_ZH_HAN_PAGE &&
4182
2/2
✓ Branch 0 taken 1356 times.
✓ Branch 1 taken 15200 times.
16556 zh_han_pages[page - MIN_ZH_HAN_PAGE]))
4183 672960 continue;
4184
2/2
✓ Branch 0 taken 5980160 times.
✓ Branch 1 taken 23360 times.
6003520 for (int off = 0; off < 256; off++) {
4185 5980160 uint16 *wbeg = UCA900_WEIGHT_ADDR(dst->weights[page], 0, off);
4186 5980160 int num_of_ce = UCA900_NUM_OF_CE(dst->weights[page], off);
4187
2/2
✓ Branch 0 taken 6612156 times.
✓ Branch 1 taken 5980160 times.
12592316 for (int ce = 0; ce < num_of_ce; ce++) {
4188
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6612156 times.
6612156 assert(reorder_param->wt_rec_num == 1);
4189
2/2
✓ Branch 0 taken 4781389 times.
✓ Branch 1 taken 1830767 times.
6612156 if (*wbeg >= reorder_param->wt_rec[0].old_wt_bdy.begin &&
4190
2/2
✓ Branch 0 taken 3200440 times.
✓ Branch 1 taken 1580949 times.
4781389 *wbeg <= reorder_param->wt_rec[0].old_wt_bdy.end) {
4191 3200440 *wbeg = *wbeg + reorder_param->wt_rec[0].new_wt_bdy.begin -
4192 3200440 reorder_param->wt_rec[0].old_wt_bdy.begin;
4193
2/2
✓ Branch 0 taken 1469469 times.
✓ Branch 1 taken 1942247 times.
3411716 } else if (*wbeg >= 0xFB00) {
4194 1469469 uint16 next_wt = *(wbeg + UCA900_DISTANCE_BETWEEN_WEIGHTS);
4195
4/4
✓ Branch 0 taken 1469409 times.
✓ Branch 1 taken 60 times.
✓ Branch 2 taken 498638 times.
✓ Branch 3 taken 970771 times.
1469469 if (*wbeg >= 0xFB40 && *wbeg <= 0xFBC1) { // Han's implicit weight
4196 /*
4197 If some characters in DUCET share the same implicit weight, their
4198 reordered weight should be same too.
4199 */
4200 498638 my_wc_t ch = convert_implicit_to_ch(*wbeg, next_wt);
4201
1/2
✓ Branch 0 taken 498638 times.
✗ Branch 1 not taken.
498638 if (zh_han_to_single_weight_map.find(ch) !=
4202
2/2
✓ Branch 0 taken 215721 times.
✓ Branch 1 taken 282917 times.
997276 zh_han_to_single_weight_map.end()) {
4203
1/2
✓ Branch 0 taken 215721 times.
✗ Branch 1 not taken.
215721 *wbeg = zh_han_to_single_weight_map[ch];
4204 215721 *(wbeg + UCA900_DISTANCE_BETWEEN_WEIGHTS) = 0;
4205 215721 wbeg += UCA900_DISTANCE_BETWEEN_WEIGHTS;
4206 215721 ce++;
4207 215721 continue;
4208 }
4209 }
4210 1253748 *wbeg = change_zh_implicit(*wbeg);
4211 1253748 wbeg += UCA900_DISTANCE_BETWEEN_WEIGHTS;
4212 1253748 ce++;
4213 }
4214 6396435 wbeg += UCA900_DISTANCE_BETWEEN_WEIGHTS;
4215 }
4216 }
4217 }
4218 160 }
4219
4220 29006 static bool init_weight_level(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
4221 MY_COLL_RULES *rules, int level, MY_UCA_INFO *dst,
4222 const MY_UCA_INFO *src,
4223 bool lengths_are_temporary) {
4224 MY_COLL_RULE *r, *rlast;
4225 29006 size_t i, npages = (src->maxchar + 1) / 256;
4226 29006 bool has_contractions = false;
4227
4228 29006 dst->maxchar = src->maxchar;
4229
4230
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 29006 times.
29006 if (check_rules(loader, rules, dst, src)) return true;
4231
4232 /* Allocate memory for pages and their lengths */
4233
2/2
✓ Branch 0 taken 9359 times.
✓ Branch 1 taken 19647 times.
29006 if (lengths_are_temporary) {
4234
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 9359 times.
9359 if (!(dst->lengths = (uchar *)(loader->mem_malloc)(npages))) return true;
4235 9359 if (!(dst->weights =
4236
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 9359 times.
9359 (uint16 **)(loader->once_alloc)(npages * sizeof(uint16 *)))) {
4237 (loader->mem_free)(dst->lengths);
4238 return true;
4239 }
4240 } else {
4241
2/4
✓ Branch 0 taken 19647 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 19647 times.
39294 if (!(dst->lengths = (uchar *)(loader->once_alloc)(npages)) ||
4242 19647 !(dst->weights =
4243
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 19647 times.
19647 (uint16 **)(loader->once_alloc)(npages * sizeof(uint16 *))))
4244 return true;
4245 }
4246
4247 /*
4248 Copy pages lengths and page pointers from the default UCA weights.
4249 */
4250 29006 memcpy(dst->lengths, src->lengths, npages);
4251 29006 memcpy(dst->weights, src->weights, npages * sizeof(uint16 *));
4252
4253 /*
4254 Calculate maximum lengths for the pages which will be overwritten.
4255 Mark pages that will be overwritten as NULL.
4256 We'll allocate their own memory.
4257 */
4258
2/2
✓ Branch 0 taken 1250794 times.
✓ Branch 1 taken 29006 times.
1279800 for (r = rules->rule, rlast = rules->rule + rules->nrules; r < rlast; r++) {
4259
2/2
✓ Branch 0 taken 983639 times.
✓ Branch 1 taken 267155 times.
1250794 if (!r->curr[1]) /* If not a contraction */
4260 {
4261 983639 uint pagec = (r->curr[0] >> 8);
4262
2/2
✓ Branch 0 taken 218773 times.
✓ Branch 1 taken 764866 times.
983639 if (r->base[1]) /* Expansion */
4263 {
4264 /* Reserve space for maximum possible length */
4265 218773 dst->lengths[pagec] = MY_UCA_MAX_WEIGHT_SIZE;
4266 } else {
4267 764866 uint pageb = (r->base[0] >> 8);
4268
6/6
✓ Branch 0 taken 251626 times.
✓ Branch 1 taken 513240 times.
✓ Branch 2 taken 162963 times.
✓ Branch 3 taken 88663 times.
✓ Branch 4 taken 53292 times.
✓ Branch 5 taken 109671 times.
764866 if ((r->diff[0] || r->diff[1] || r->diff[2]) &&
4269
2/2
✓ Branch 0 taken 37200 times.
✓ Branch 1 taken 617995 times.
655195 dst->lengths[pagec] < (src->lengths[pageb] + 3)) {
4270
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 37200 times.
37200 if ((src->lengths[pageb] + 3) > MY_UCA_MAX_WEIGHT_SIZE)
4271 dst->lengths[pagec] = MY_UCA_MAX_WEIGHT_SIZE;
4272 else
4273 37200 dst->lengths[pagec] = src->lengths[pageb] + 3;
4274
2/2
✓ Branch 0 taken 648 times.
✓ Branch 1 taken 727018 times.
727666 } else if (dst->lengths[pagec] < src->lengths[pageb])
4275 648 dst->lengths[pagec] = src->lengths[pageb];
4276 }
4277 983639 dst->weights[pagec] = nullptr; /* Mark that we'll overwrite this page */
4278 } else
4279 267155 has_contractions = true;
4280 }
4281
4282
2/2
✓ Branch 0 taken 8844 times.
✓ Branch 1 taken 20162 times.
29006 if (has_contractions) {
4283 8844 dst->have_contractions = true;
4284
2/4
✓ Branch 0 taken 8844 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8844 times.
✗ Branch 3 not taken.
8844 dst->contraction_nodes = new std::vector<MY_CONTRACTION>(0);
4285 8844 if (!(dst->contraction_flags =
4286
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8844 times.
8844 (char *)(loader->once_alloc)(MY_UCA_CNT_FLAG_SIZE)))
4287 return true;
4288 8844 memset(dst->contraction_flags, 0, MY_UCA_CNT_FLAG_SIZE);
4289 }
4290
2/2
✓ Branch 0 taken 160 times.
✓ Branch 1 taken 28846 times.
29006 if (cs->coll_param == &zh_coll_param) {
4291 /*
4292 We are going to reorder the weight of characters in uca pages when
4293 initializing this collation. And because of the reorder rule [reorder
4294 Hani], we need to change almost every character's weight. So copy all
4295 the pages.
4296 Please also see the comment on modify_all_zh_pages().
4297 */
4298 bool rc;
4299
2/2
✓ Branch 0 taken 696320 times.
✓ Branch 1 taken 160 times.
696480 for (i = 0; i < npages; i++) {
4300
4/6
✓ Branch 0 taken 24716 times.
✓ Branch 1 taken 671604 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 24716 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 696320 times.
696320 if (dst->lengths[i] && (rc = my_uca_copy_page(cs, loader, src, dst, i)))
4301 return rc;
4302 }
4303 160 modify_all_zh_pages(cs->coll_param->reorder_param, dst, npages);
4304 160 copy_zh_han_pages(dst);
4305 } else {
4306 /* Allocate pages that we'll overwrite and copy default weights */
4307
2/2
✓ Branch 0 taken 49044992 times.
✓ Branch 1 taken 28846 times.
49073838 for (i = 0; i < npages; i++) {
4308 bool rc;
4309 /*
4310 Don't touch pages with lengths[i]==0, they have implicit weights
4311 calculated algorithmically.
4312 */
4313
5/6
✓ Branch 0 taken 46520142 times.
✓ Branch 1 taken 2524850 times.
✓ Branch 2 taken 56590 times.
✓ Branch 3 taken 46463552 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 49044992 times.
49101582 if (!dst->weights[i] && dst->lengths[i] &&
4314
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 56590 times.
56590 (rc = my_uca_copy_page(cs, loader, src, dst, i)))
4315 return rc;
4316 }
4317
4318 28846 copy_ja_han_pages(cs, dst);
4319 }
4320
4321 /*
4322 Preparatory step is done at this point.
4323 Now we have memory allocated for the pages that we'll overwrite,
4324 and for contractions, including previous context contractions.
4325 Also, for the pages that we'll overwrite, we have copied default weights.
4326 Now iterate through the rules, overwrite weights for the characters
4327 that appear in the rules, and put all contractions into contraction list.
4328 */
4329
2/2
✓ Branch 0 taken 1250794 times.
✓ Branch 1 taken 29004 times.
1279798 for (r = rules->rule; r < rlast; r++) {
4330
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1250792 times.
1250794 if (apply_one_rule(cs, loader, rules, r, level, dst)) return true;
4331 }
4332 29004 return false;
4333 }
4334
4335 /**
4336 Check whether the composition character is already in rule list
4337 @param rules The rule list
4338 @param wc The composition character
4339 @return true The composition character is already in list
4340 false The composition character is not in list
4341 */
4342 578607 static bool my_comp_in_rulelist(const MY_COLL_RULES *rules, my_wc_t wc) {
4343 MY_COLL_RULE *r, *rlast;
4344
2/2
✓ Branch 0 taken 100543160 times.
✓ Branch 1 taken 468377 times.
101011537 for (r = rules->rule, rlast = rules->rule + rules->nrules; r < rlast; r++) {
4345
4/4
✓ Branch 0 taken 138654 times.
✓ Branch 1 taken 100404506 times.
✓ Branch 2 taken 110230 times.
✓ Branch 3 taken 28424 times.
100543160 if (r->curr[0] == wc && r->curr[1] == 0) return true;
4346 }
4347 468377 return false;
4348 }
4349
4350 /**
4351 Check whether a composition character in the decomposition list is a
4352 normal character.
4353 @param dec_ind The index of composition character in list
4354 @return Whether it is a normal character
4355 */
4356 1761683638 static inline bool my_compchar_is_normal_char(uint dec_ind) {
4357 1761683638 return uni_dec[dec_ind].decomp_tag == DECOMP_TAG_NONE;
4358 }
4359
4360 904076 static inline bool my_compchar_is_normal_char(const Unidata_decomp *decomp) {
4361 904076 return my_compchar_is_normal_char(decomp - std::begin(uni_dec));
4362 }
4363
4364 307721 static Unidata_decomp *get_decomposition(my_wc_t ch) {
4365 3883031 auto comp_func = [](Unidata_decomp x, Unidata_decomp y) {
4366 3883031 return x.charcode < y.charcode;
4367 };
4368 307721 Unidata_decomp to_find = {ch, CHAR_CATEGORY_LU, DECOMP_TAG_NONE, {0}};
4369
1/2
✓ Branch 0 taken 307721 times.
✗ Branch 1 not taken.
307721 Unidata_decomp *decomp = std::lower_bound(
4370 std::begin(uni_dec), std::end(uni_dec), to_find, comp_func);
4371
5/6
✓ Branch 0 taken 307721 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 49383 times.
✓ Branch 3 taken 258338 times.
✓ Branch 4 taken 49383 times.
✓ Branch 5 taken 258338 times.
307721 if (decomp == std::end(uni_dec) || decomp->charcode != ch) return nullptr;
4372 258338 return decomp;
4373 }
4374
4375 3645724 static Combining_mark *my_find_combining_mark(my_wc_t code) {
4376 35679820 auto comp_func = [](Combining_mark x, Combining_mark y) {
4377 35679820 return x.charcode < y.charcode;
4378 };
4379 3645724 Combining_mark to_find = {code, 0};
4380
1/2
✓ Branch 0 taken 3645724 times.
✗ Branch 1 not taken.
3645724 return std::lower_bound(std::begin(combining_marks),
4381 7291448 std::end(combining_marks), to_find, comp_func);
4382 }
4383
4384 /**
4385 Check if a list of combining marks contains the whole list of origin
4386 decomposed combining marks.
4387 @param origin_dec The origin list of combining marks decomposed from
4388 character in tailoring rule.
4389 @param dec_codes The list of combining marks decomposed from
4390 character in decomposition list.
4391 @param dec_diff The combining marks exist in dec_codes but not in
4392 origin_dec.
4393 @return Whether the list of combining marks contains the
4394 whole list of origin combining marks.
4395 */
4396 284165408 static bool my_is_inheritance_of_origin(const my_wc_t *origin_dec,
4397 const my_wc_t *dec_codes,
4398 my_wc_t *dec_diff) {
4399 int ind0, ind1, ind2;
4400
2/2
✓ Branch 0 taken 282344219 times.
✓ Branch 1 taken 1821189 times.
284165408 if (origin_dec[0] != dec_codes[0]) return false;
4401
1/2
✓ Branch 0 taken 2710035 times.
✗ Branch 1 not taken.
4531224 for (ind0 = ind1 = ind2 = 1; ind0 < MY_UCA_MAX_CONTRACTION &&
4402 2710035 ind1 < MY_UCA_MAX_CONTRACTION &&
4403
5/6
✓ Branch 0 taken 2710035 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2456908 times.
✓ Branch 3 taken 253127 times.
✓ Branch 4 taken 2005938 times.
✓ Branch 5 taken 450970 times.
5420070 origin_dec[ind0] && dec_codes[ind1];) {
4404
2/2
✓ Branch 0 taken 183076 times.
✓ Branch 1 taken 1822862 times.
2005938 if (origin_dec[ind0] == dec_codes[ind1]) {
4405 183076 ind0++;
4406 183076 ind1++;
4407 } else {
4408 1822862 Combining_mark *mark0 = my_find_combining_mark(origin_dec[ind0]);
4409 1822862 Combining_mark *mark1 = my_find_combining_mark(dec_codes[ind1]);
4410
2/2
✓ Branch 0 taken 1117092 times.
✓ Branch 1 taken 705770 times.
1822862 if (mark0->ccc == mark1->ccc) return false;
4411 705770 dec_diff[ind2++] = dec_codes[ind1++];
4412 }
4413 }
4414
3/4
✓ Branch 0 taken 704097 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 253127 times.
✓ Branch 3 taken 450970 times.
704097 if (ind0 >= MY_UCA_MAX_CONTRACTION || !origin_dec[ind0]) {
4415
2/2
✓ Branch 0 taken 1075303 times.
✓ Branch 1 taken 253127 times.
1328430 while (ind1 < MY_UCA_MAX_CONTRACTION) {
4416 1075303 dec_diff[ind2++] = dec_codes[ind1++];
4417 }
4418 253127 return true;
4419 }
4420 450970 return false;
4421 }
4422
4423 /**
4424 Add new rules recersively if one rule's characters are in decomposition
4425 list.
4426 @param rules The rule list
4427 @param r The rule to check
4428 @param decomp_rec The decomposition of the character in rule.
4429 @param comp_added Bitset which marks whether the comp
4430 character has been added to rule list.
4431 @return 1 Error adding new rules
4432 0 Add rules successfully
4433 */
4434 307721 static int my_coll_add_inherit_rules(
4435 MY_COLL_RULES *rules, MY_COLL_RULE *r, const Unidata_decomp *decomp_rec,
4436 std::bitset<array_elements(uni_dec)> *comp_added) {
4437
2/2
✓ Branch 0 taken 1760779562 times.
✓ Branch 1 taken 307721 times.
1761087283 for (uint dec_ind = 0; dec_ind < array_elements(uni_dec); dec_ind++) {
4438 /*
4439 For normal character which can be decomposed, it is always decomposed to
4440 be another character and one combining mark.
4441
4442 Currently we only support the weight inheritance of character that can be
4443 canonical-decomposed to another character and a list of combining marks.
4444 So skip the compatibility decomposition.
4445
4446 Sample from UnicodeData.txt:
4447 Canonical decomposition: U+00DC : U+0055 U+0308
4448 Compatibility decompsition: U+FF59 : <wide> U+0079
4449 */
4450
9/10
✓ Branch 0 taken 633905260 times.
✓ Branch 1 taken 1126874302 times.
✓ Branch 2 taken 633905260 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 624302842 times.
✓ Branch 5 taken 9602418 times.
✓ Branch 6 taken 523209339 times.
✓ Branch 7 taken 101093503 times.
✓ Branch 8 taken 1476614154 times.
✓ Branch 9 taken 284165408 times.
2283988901 if (!my_compchar_is_normal_char(dec_ind) || comp_added->test(dec_ind) ||
4451 523209339 (decomp_rec != nullptr &&
4452
2/2
✓ Branch 0 taken 340137434 times.
✓ Branch 1 taken 183071905 times.
523209339 uni_dec[dec_ind].decomp_tag != decomp_rec->decomp_tag))
4453 1476614154 continue;
4454 /*
4455 In DUCET, all accented character's weight is defined as base
4456 character's weight followed by accent mark's weight. For example:
4457 00DC = 0055 + 0308
4458 0055 ; [.1E30.0020.0008] # LATIN CAPITAL LETTER U
4459 0308 ; [.0000.002B.0002] # COMBINING DIAERESIS
4460 00DC ; [.1E30.0020.0008][.0000.002B.0002] # LATIN CAPITAL LETTER U
4461 WITH DIAERESIS
4462 So the composition character's rule should be same as origin rule
4463 except of the change of curr value.
4464 */
4465 284165408 my_wc_t dec_diff[MY_UCA_MAX_CONTRACTION]{r->curr[0], 0};
4466 284165408 my_wc_t orig_dec[MY_UCA_MAX_CONTRACTION]{0};
4467
2/2
✓ Branch 0 taken 101093503 times.
✓ Branch 1 taken 183071905 times.
284165408 if (decomp_rec == nullptr) {
4468 /*
4469 If there is no decomposition record found in Unidata_decomp, it means
4470 its decomposition form is itself.
4471 */
4472 101093503 orig_dec[0] = r->curr[0];
4473 } else {
4474 183071905 memcpy(orig_dec, decomp_rec->dec_codes, sizeof(orig_dec));
4475 }
4476
1/2
✓ Branch 0 taken 284165408 times.
✗ Branch 1 not taken.
284165408 if (my_is_inheritance_of_origin(orig_dec, uni_dec[dec_ind].dec_codes,
4477
4/4
✓ Branch 0 taken 253127 times.
✓ Branch 1 taken 283912281 times.
✓ Branch 2 taken 154431 times.
✓ Branch 3 taken 284010977 times.
284418535 dec_diff) &&
4478
2/2
✓ Branch 0 taken 154431 times.
✓ Branch 1 taken 98696 times.
253127 !my_comp_in_rulelist(rules, uni_dec[dec_ind].charcode)) {
4479 154431 MY_COLL_RULE newrule{{0}, {uni_dec[dec_ind].charcode, 0}, {0}, 0, false};
4480 154431 memcpy(newrule.base, dec_diff, sizeof(newrule.base));
4481
2/4
✓ Branch 0 taken 154431 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 154431 times.
154431 if (my_coll_rules_add(rules, &newrule)) return 1;
4482
1/2
✓ Branch 0 taken 154431 times.
✗ Branch 1 not taken.
154431 comp_added->set(dec_ind);
4483 }
4484 }
4485 307721 return 0;
4486 }
4487
4488 313946 static bool combining_mark_in_rulelist(const my_wc_t *dec_codes,
4489 const MY_COLL_RULE *r_start,
4490 const MY_COLL_RULE *r_end) {
4491
1/2
✓ Branch 0 taken 466416 times.
✗ Branch 1 not taken.
466416 for (int i = 1; i < MY_UCA_MAX_CONTRACTION; ++i) {
4492
2/2
✓ Branch 0 taken 279818 times.
✓ Branch 1 taken 186598 times.
466416 if (!*(dec_codes + i)) return false;
4493
2/2
✓ Branch 0 taken 12146882 times.
✓ Branch 1 taken 152470 times.
12299352 for (const MY_COLL_RULE *r = r_start; r < r_end; ++r) {
4494
2/2
✓ Branch 0 taken 34128 times.
✓ Branch 1 taken 12112754 times.
12146882 if (r->curr[0] == *(dec_codes + i)) {
4495 34128 return true;
4496 }
4497 }
4498 }
4499 return false;
4500 }
4501
4502 3023 static int add_normalization_rules(const CHARSET_INFO *cs,
4503 MY_COLL_RULES *rules) {
4504
3/4
✓ Branch 0 taken 3023 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2865 times.
✓ Branch 3 taken 158 times.
3023 if (!cs->coll_param || !cs->coll_param->norm_enabled) return 0;
4505 158 const int orig_rule_num = rules->nrules;
4506
2/2
✓ Branch 0 taken 904076 times.
✓ Branch 1 taken 158 times.
904234 for (Unidata_decomp *decomp = std::begin(uni_dec); decomp < std::end(uni_dec);
4507 ++decomp) {
4508
2/2
✓ Branch 0 taken 313946 times.
✓ Branch 1 taken 11534 times.
1229556 if (!my_compchar_is_normal_char(decomp) ||
4509
4/4
✓ Branch 0 taken 325480 times.
✓ Branch 1 taken 578596 times.
✓ Branch 2 taken 869948 times.
✓ Branch 3 taken 34128 times.
1229556 my_comp_in_rulelist(rules, decomp->charcode) ||
4510
2/2
✓ Branch 0 taken 279818 times.
✓ Branch 1 taken 34128 times.
313946 !combining_mark_in_rulelist(decomp->dec_codes, rules->rule,
4511 313946 rules->rule + orig_rule_num))
4512 869948 continue;
4513 34128 MY_COLL_RULE newrule{{0}, {decomp->charcode, 0}, {0}, 0, false};
4514 34128 memcpy(newrule.base, decomp->dec_codes, sizeof(newrule.base));
4515
2/4
✓ Branch 0 taken 34128 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 34128 times.
34128 if (my_coll_rules_add(rules, &newrule)) return 1;
4516 }
4517 158 return 0;
4518 }
4519
4520 /**
4521 For every rule in rule list, check and add new rules if it is in
4522 decomposition list.
4523 @param cs Character set info
4524 @param rules The rule list
4525 @return 1 Error happens when adding new rule
4526 0 Add rules successfully
4527 */
4528 29006 static int my_coll_check_rule_and_inherit(const CHARSET_INFO *cs,
4529 MY_COLL_RULES *rules) {
4530
2/2
✓ Branch 0 taken 19647 times.
✓ Branch 1 taken 9359 times.
29006 if (rules->uca->version != UCA_V900) return 0;
4531
4532 /*
4533 Character can combine with marks to be a new character. For example,
4534 A + [mark b] = A1, A1 + [mark c] = A2. We think the weight of A1 and
4535 A2 should shift with A if A is in rule list and its weight shifts,
4536 unless A1 / A2 is already in rule list.
4537 */
4538 9359 std::bitset<array_elements(uni_dec)> comp_added;
4539 9359 int orig_rule_num = rules->nrules;
4540
2/2
✓ Branch 0 taken 546208 times.
✓ Branch 1 taken 9359 times.
555567 for (int i = 0; i < orig_rule_num; ++i) {
4541 546208 MY_COLL_RULE r = *(rules->rule + i);
4542 /*
4543 Do not add inheritance rule for contraction.
4544 But for the Chinese collation, the weight shift rule of Chinese collation
4545 is a bit different from all the languages we added so far. For example, it
4546 has a rule "&e << ... << e\\u0302\\u0300". So far, if a language's rule
4547 involves 'e\\u0302\\u0300', it will use the combining form character,
4548 U+1EC1, and it is not a contraction. If we don't handle this for Chinese
4549 collation, it will skip some further rule inheriting.
4550 */
4551
4/4
✓ Branch 0 taken 462048 times.
✓ Branch 1 taken 84160 times.
✓ Branch 2 taken 238487 times.
✓ Branch 3 taken 223561 times.
546208 if (cs->coll_param != &zh_coll_param && r.curr[1]) continue;
4552
1/2
✓ Branch 0 taken 307721 times.
✗ Branch 1 not taken.
307721 Unidata_decomp *decomp_rec = get_decomposition(r.curr[0]);
4553
2/4
✓ Branch 0 taken 307721 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 307721 times.
307721 if (my_coll_add_inherit_rules(rules, &r, decomp_rec, &comp_added)) return 1;
4554 }
4555 9359 return 0;
4556 }
4557
4558 /**
4559 Helper function to store weight boundary values.
4560 @param[out] wt_rec Weight boundary for each character group and gap
4561 between groups
4562 @param rec_ind The position from where to store weight boundary
4563 @param old_begin Beginning weight of character group before reorder
4564 @param old_end End weight of character group before reorder
4565 @param new_begin Beginning weight of character group after reorder
4566 @param new_end End weight of character group after reorder
4567 */
4568 5735 static inline void my_set_weight_rec(
4569 Reorder_wt_rec (&wt_rec)[2 * UCA_MAX_CHAR_GRP], int rec_ind,
4570 uint16 old_begin, uint16 old_end, uint16 new_begin, uint16 new_end) {
4571 5735 wt_rec[rec_ind] = {{old_begin, old_end}, {new_begin, new_end}};
4572 5735 }
4573
4574 /**
4575 Calculate the reorder parameters for the character groups.
4576 @param cs Character set info
4577 @param[out] rec_ind The position from where to store weight boundary
4578 */
4579 2230 static void my_calc_char_grp_param(const CHARSET_INFO *cs, int &rec_ind) {
4580 2230 int weight_start = START_WEIGHT_TO_REORDER;
4581 2230 int grp_ind = 0;
4582 2230 Reorder_param *param = cs->coll_param->reorder_param;
4583
1/2
✓ Branch 0 taken 5735 times.
✗ Branch 1 not taken.
5735 for (; grp_ind < UCA_MAX_CHAR_GRP; ++grp_ind) {
4584
2/2
✓ Branch 0 taken 2230 times.
✓ Branch 1 taken 3505 times.
5735 if (param->reorder_grp[grp_ind] == CHARGRP_NONE) break;
4585 9886 for (Char_grp_info *info = std::begin(char_grp_infos);
4586
1/2
✓ Branch 0 taken 9886 times.
✗ Branch 1 not taken.
9886 info < std::end(char_grp_infos); ++info) {
4587
2/2
✓ Branch 0 taken 6381 times.
✓ Branch 1 taken 3505 times.
9886 if (param->reorder_grp[grp_ind] != info->group) continue;
4588 3505 my_set_weight_rec(
4589 3505 param->wt_rec, grp_ind, info->grp_wt_bdy.begin, info->grp_wt_bdy.end,
4590 weight_start,
4591 3505 weight_start + info->grp_wt_bdy.end - info->grp_wt_bdy.begin);
4592 3505 weight_start = param->wt_rec[grp_ind].new_wt_bdy.end + 1;
4593 3505 break;
4594 }
4595 }
4596 2230 rec_ind = grp_ind;
4597 2230 }
4598
4599 /**
4600 Calculate the reorder parameters for the gap between character groups.
4601 @param cs Character set info
4602 @param rec_ind The position from where to store weight boundary
4603 */
4604 2230 static void my_calc_char_grp_gap_param(CHARSET_INFO *cs, int &rec_ind) {
4605 2230 Reorder_param *param = cs->coll_param->reorder_param;
4606 2230 uint16 weight_start = param->wt_rec[rec_ind - 1].new_wt_bdy.end + 1;
4607 2230 Char_grp_info *last_grp = nullptr;
4608 13380 for (Char_grp_info *info = std::begin(char_grp_infos);
4609
2/2
✓ Branch 0 taken 11150 times.
✓ Branch 1 taken 2230 times.
13380 info < std::end(char_grp_infos); ++info) {
4610
1/2
✓ Branch 0 taken 23895 times.
✗ Branch 1 not taken.
23895 for (int ind = 0; ind < UCA_MAX_CHAR_GRP; ++ind) {
4611
2/2
✓ Branch 0 taken 7645 times.
✓ Branch 1 taken 16250 times.
23895 if (param->reorder_grp[ind] == CHARGRP_NONE) break;
4612
2/2
✓ Branch 0 taken 12745 times.
✓ Branch 1 taken 3505 times.
16250 if (param->reorder_grp[ind] != info->group) continue;
4613
2/2
✓ Branch 0 taken 794 times.
✓ Branch 1 taken 2711 times.
3505 if (param->max_weight < info->grp_wt_bdy.end)
4614 794 param->max_weight = info->grp_wt_bdy.end;
4615 /*
4616 There might be some character groups before the first character
4617 group in our list.
4618 */
4619
4/4
✓ Branch 0 taken 2230 times.
✓ Branch 1 taken 1275 times.
✓ Branch 2 taken 955 times.
✓ Branch 3 taken 1275 times.
3505 if (!last_grp && info->grp_wt_bdy.begin > START_WEIGHT_TO_REORDER) {
4620 955 my_set_weight_rec(param->wt_rec, rec_ind, START_WEIGHT_TO_REORDER,
4621 955 info->grp_wt_bdy.begin - 1, weight_start,
4622 955 weight_start + (info->grp_wt_bdy.begin - 1) -
4623 START_WEIGHT_TO_REORDER);
4624 955 weight_start = param->wt_rec[rec_ind].new_wt_bdy.end + 1;
4625 955 rec_ind++;
4626 }
4627 /* Gap between 2 character groups in out list. */
4628
3/4
✓ Branch 0 taken 1275 times.
✓ Branch 1 taken 2230 times.
✓ Branch 2 taken 1275 times.
✗ Branch 3 not taken.
3505 if (last_grp && last_grp->grp_wt_bdy.end < (info->grp_wt_bdy.begin - 1)) {
4629 1275 my_set_weight_rec(param->wt_rec, rec_ind, last_grp->grp_wt_bdy.end + 1,
4630 1275 info->grp_wt_bdy.begin - 1, weight_start,
4631 1275 weight_start + (info->grp_wt_bdy.begin - 1) -
4632 1275 (last_grp->grp_wt_bdy.end + 1));
4633 1275 weight_start = param->wt_rec[rec_ind].new_wt_bdy.end + 1;
4634 1275 rec_ind++;
4635 }
4636 3505 last_grp = info;
4637 3505 break;
4638 }
4639 }
4640 2230 param->wt_rec_num = rec_ind;
4641 2230 }
4642
4643 /**
4644 Prepare reorder parameters.
4645 @param cs Character set info
4646 */
4647 3023 static int my_prepare_reorder(CHARSET_INFO *cs) {
4648 /*
4649 Chinese collation's reordering is done in next_implicit() and
4650 modify_all_zh_pages(). See the comment on zh_reorder_param and
4651 change_zh_implicit().
4652 */
4653
4/4
✓ Branch 0 taken 2390 times.
✓ Branch 1 taken 633 times.
✓ Branch 2 taken 160 times.
✓ Branch 3 taken 2230 times.
3023 if (!cs->coll_param->reorder_param || cs->coll_param == &zh_coll_param)
4654 793 return 0;
4655 /*
4656 For each group of character, for example, latin characters,
4657 their weights are in a separate range. The default sequence
4658 of these groups is: Latin, Greek, Coptic, Cyrillic, and so
4659 on. Some languages want to change the default sequence. For
4660 example, Croatian wants to put Cyrillic to just behind Latin.
4661 We need to reorder the character groups and change their
4662 weight accordingly. Here we calculate the parameters needed
4663 for weight change. And the change will happen when weight
4664 returns from strnxfrm.
4665 */
4666 2230 int rec_ind = 0;
4667 2230 my_calc_char_grp_param(cs, rec_ind);
4668 2230 my_calc_char_grp_gap_param(cs, rec_ind);
4669 2230 return rec_ind;
4670 }
4671
4672 323 static void adjust_japanese_weight(CHARSET_INFO *cs, int rec_ind) {
4673 /*
4674 Per CLDR 30, Japanese collations need to reorder characters as
4675 [Latin, Kana, Han, others]. So for the original character group list:
4676 [Latin, CharA, Kana, CharB, Han, Others], it should be reordered as
4677 [Latin, Kana, Han, CharA, CharB, Others]. But my_prepare_reorder()
4678 reorders original group to be [Latin, Kana, CharA, CharB, Han, Others].
4679 This is because Han characters are different from others in that Han
4680 characters' weight is implicit and has two primary weights for each
4681 character. Other characters have only one primary weight for each (base)
4682 character. Han characters always sort bigger.
4683
4684 CLDR defines the collating order for 6355 Japanese Han characters. All
4685 of them are in [U+4E00, U+9FFF]; we give them tailored primary weights
4686 in ja_han_pages. The tailored primary weights are just after Kana,
4687 because these characters are very common. These Han characters' weight
4688 pages will be added to collation's UCA data in copy_ja_han_pages().
4689 For the other Han characters, we don't change their implicit weights,
4690 which is [FB80 - FB85, 0020, 0002][XXXX, 0000, 0000].
4691
4692 To make sure CharA and CharB's weight is greater than all Han characters,
4693 we give them weight as [FB86, 0000, 0000][origin weights]. This will be
4694 done in apply_reorder_param().
4695
4696 Because the values stored in last wt_rec element is calculated for moving
4697 CharA to be after Kana, but we want them to be after all Han character,
4698 we reset the weight boundary here, and will change all these characters'
4699 weight in apply_reorder_param().
4700 */
4701 323 Reorder_param *param = cs->coll_param->reorder_param;
4702 323 param->wt_rec[rec_ind - 1].new_wt_bdy.begin = 0;
4703 323 param->wt_rec[rec_ind - 1].new_wt_bdy.end = 0;
4704 323 param->wt_rec[rec_ind].old_wt_bdy.begin = param->wt_rec[1].old_wt_bdy.end + 1;
4705 323 param->wt_rec[rec_ind].old_wt_bdy.end = 0x54A3;
4706 323 param->wt_rec[rec_ind].new_wt_bdy.begin = 0;
4707 323 param->wt_rec[rec_ind].new_wt_bdy.end = 0;
4708 323 param->wt_rec_num++;
4709 323 param->max_weight = 0x54A3;
4710 323 }
4711
4712 /**
4713 Prepare parametric tailoring, like reorder, etc.
4714 @param cs Character set info
4715 @param rules Collation rule list to add to.
4716 @return false Collation parameters applied successfully.
4717 true Error happened.
4718 */
4719 29006 static bool my_prepare_coll_param(CHARSET_INFO *cs, MY_COLL_RULES *rules) {
4720
4/4
✓ Branch 0 taken 9359 times.
✓ Branch 1 taken 19647 times.
✓ Branch 2 taken 6336 times.
✓ Branch 3 taken 3023 times.
29006 if (rules->uca->version != UCA_V900 || !cs->coll_param) return false;
4721
4722 3023 int rec_ind = my_prepare_reorder(cs);
4723
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3023 times.
3023 if (add_normalization_rules(cs, rules)) return true;
4724
4725
2/2
✓ Branch 0 taken 323 times.
✓ Branch 1 taken 2700 times.
3023 if (cs->coll_param == &ja_coll_param) adjust_japanese_weight(cs, rec_ind);
4726 /* Might add other parametric tailoring rules later. */
4727 3023 return false;
4728 }
4729
4730 /*
4731 This function copies an UCS2 collation from
4732 the default Unicode Collation Algorithm (UCA)
4733 weights applying tailorings, i.e. a set of
4734 alternative weights for some characters.
4735
4736 The default UCA weights are stored in uca_weight/uca_length.
4737 They consist of 256 pages, 256 character each.
4738
4739 If a page is not overwritten by tailoring rules,
4740 it is copies as is from UCA as is.
4741
4742 If a page contains some overwritten characters, it is
4743 allocated. Untouched characters are copied from the
4744 default weights.
4745 */
4746
4747 136555 static bool create_tailoring(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader) {
4748
2/2
✓ Branch 0 taken 107546 times.
✓ Branch 1 taken 29009 times.
136555 if (!cs->tailoring)
4749 107546 return false; /* Ok to add a collation without tailoring */
4750
4751 MY_COLL_RULES rules;
4752 29009 MY_UCA_INFO new_uca, *src_uca = nullptr;
4753 29009 int rc = 0;
4754 MY_UCA_INFO *src, *dst;
4755 size_t npages;
4756 bool lengths_are_temporary;
4757
4758 29009 loader->errcode = 0;
4759 29009 *loader->errarg = '\0';
4760
4761 29009 memset(&rules, 0, sizeof(rules));
4762 29009 rules.loader = loader;
4763
1/2
✓ Branch 0 taken 29009 times.
✗ Branch 1 not taken.
29009 rules.uca = cs->uca ? cs->uca : &my_uca_v400; /* For logical positions, etc */
4764 29009 memset(&new_uca, 0, sizeof(new_uca));
4765
4766 /* Parse ICU Collation Customization expression */
4767
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 29006 times.
29009 if ((rc = my_coll_rule_parse(&rules, cs->tailoring,
4768
1/2
✓ Branch 0 taken 29009 times.
✗ Branch 1 not taken.
29009 cs->tailoring + strlen(cs->tailoring),
4769 cs->m_coll_name)))
4770 3 goto ex;
4771
4772
2/4
✓ Branch 0 taken 29006 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 29006 times.
29006 if ((rc = my_coll_check_rule_and_inherit(cs, &rules))) goto ex;
4773
4774
2/4
✓ Branch 0 taken 29006 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 29006 times.
29006 if ((rc = my_prepare_coll_param(cs, &rules))) goto ex;
4775
4776
2/2
✓ Branch 0 taken 972 times.
✓ Branch 1 taken 28034 times.
29006 if (rules.uca->version == UCA_V520) /* Unicode-5.2.0 requested */
4777 {
4778 972 src_uca = &my_uca_v520;
4779 972 cs->caseinfo = &my_unicase_unicode520;
4780
2/2
✓ Branch 0 taken 18675 times.
✓ Branch 1 taken 9359 times.
28034 } else if (rules.uca->version == UCA_V400) /* Unicode-4.0.0 requested */
4781 {
4782 18675 src_uca = &my_uca_v400;
4783
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 18675 times.
18675 if (!cs->caseinfo) cs->caseinfo = &my_unicase_default;
4784 } else /* No Unicode version specified */
4785 {
4786
1/2
✓ Branch 0 taken 9359 times.
✗ Branch 1 not taken.
9359 src_uca = cs->uca ? cs->uca : &my_uca_v400;
4787
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 9359 times.
9359 if (!cs->caseinfo) cs->caseinfo = &my_unicase_default;
4788 }
4789
4790 /*
4791 For UCA 9.0.0, we don't have a length page, but we still create one
4792 temporarily so that we can keep track of much memory we need to
4793 allocate for weights.
4794 */
4795 29006 src = src_uca;
4796 29006 dst = &new_uca;
4797
4798 29006 dst->extra_ce_pri_base = cs->uca->extra_ce_pri_base;
4799 29006 dst->extra_ce_sec_base = cs->uca->extra_ce_sec_base;
4800 29006 dst->extra_ce_ter_base = cs->uca->extra_ce_ter_base;
4801
4/4
✓ Branch 0 taken 3023 times.
✓ Branch 1 taken 25983 times.
✓ Branch 2 taken 160 times.
✓ Branch 3 taken 2863 times.
29006 if (cs->coll_param && cs->coll_param == &zh_coll_param) {
4802 160 dst->extra_ce_pri_base = ZH_EXTRA_CE_PRI;
4803 }
4804
4805 29006 npages = (src->maxchar + 1) / 256;
4806
2/2
✓ Branch 0 taken 9359 times.
✓ Branch 1 taken 19647 times.
29006 if (rules.uca->version == UCA_V900) {
4807
2/4
✓ Branch 0 taken 9359 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 9359 times.
9359 if (!(src->lengths = (uchar *)(loader->mem_malloc)(npages))) goto ex;
4808
1/2
✓ Branch 0 taken 9359 times.
✗ Branch 1 not taken.
9359 synthesize_lengths_900(src->lengths, src->weights, npages);
4809 }
4810
4811 29006 lengths_are_temporary = (rules.uca->version == UCA_V900);
4812
3/4
✓ Branch 0 taken 29006 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 29004 times.
29006 if ((rc = init_weight_level(cs, loader, &rules, 0, dst, src,
4813 lengths_are_temporary)))
4814 2 goto ex;
4815
4816
2/2
✓ Branch 0 taken 9359 times.
✓ Branch 1 taken 19645 times.
29004 if (lengths_are_temporary) {
4817
1/2
✓ Branch 0 taken 9359 times.
✗ Branch 1 not taken.
9359 (loader->mem_free)(src->lengths);
4818
1/2
✓ Branch 0 taken 9359 times.
✗ Branch 1 not taken.
9359 (loader->mem_free)(dst->lengths);
4819 9359 src->lengths = nullptr;
4820 9359 dst->lengths = nullptr;
4821 }
4822
4823 29004 new_uca.version = src_uca->version;
4824
2/4
✓ Branch 0 taken 29004 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 29004 times.
29004 if (!(cs->uca = (MY_UCA_INFO *)(loader->once_alloc)(sizeof(MY_UCA_INFO)))) {
4825 rc = 1;
4826 goto ex;
4827 }
4828 29004 memset(cs->uca, 0, sizeof(MY_UCA_INFO));
4829 29004 cs->uca[0] = new_uca;
4830
4831 29009 ex:
4832
1/2
✓ Branch 0 taken 29009 times.
✗ Branch 1 not taken.
29009 (loader->mem_free)(rules.rule);
4833
3/4
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 29004 times.
✓ Branch 2 taken 5 times.
✗ Branch 3 not taken.
29009 if (rc != 0 && loader->errcode) {
4834
3/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
5 if (new_uca.contraction_nodes) delete new_uca.contraction_nodes;
4835
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 loader->reporter(ERROR_LEVEL, loader->errcode, loader->errarg);
4836 }
4837 29009 return rc;
4838 }
4839
4840 19367193 static void my_coll_uninit_uca(CHARSET_INFO *cs) {
4841
4/4
✓ Branch 0 taken 7215187 times.
✓ Branch 1 taken 12152006 times.
✓ Branch 2 taken 8842 times.
✓ Branch 3 taken 7206345 times.
19367193 if (cs->uca && cs->uca->contraction_nodes) {
4842
1/2
✓ Branch 0 taken 8842 times.
✗ Branch 1 not taken.
8842 delete cs->uca->contraction_nodes;
4843 8842 cs->uca->contraction_nodes = nullptr;
4844 8842 cs->state &= ~MY_CS_READY;
4845 }
4846 19367193 }
4847 /*
4848 Universal CHARSET_INFO compatible wrappers
4849 for the above internal functions.
4850 Should work for any character set.
4851 */
4852
4853 extern "C" {
4854 136555 static bool my_coll_init_uca(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader) {
4855 136555 cs->pad_char = ' ';
4856 136555 cs->ctype = my_charset_utf8_unicode_ci.ctype;
4857
2/2
✓ Branch 0 taken 39 times.
✓ Branch 1 taken 136516 times.
136555 if (!cs->caseinfo) cs->caseinfo = &my_unicase_default;
4858
2/2
✓ Branch 0 taken 18680 times.
✓ Branch 1 taken 117875 times.
136555 if (!cs->uca) cs->uca = &my_uca_v400;
4859 136555 return create_tailoring(cs, loader);
4860 }
4861
4862 80 static int my_strnncoll_any_uca(const CHARSET_INFO *cs, const uchar *s,
4863 size_t slen, const uchar *t, size_t tlen,
4864 bool t_is_prefix) {
4865
2/2
✓ Branch 0 taken 54 times.
✓ Branch 1 taken 26 times.
80 if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) {
4866
1/2
✓ Branch 0 taken 54 times.
✗ Branch 1 not taken.
54 return my_strnncoll_uca<uca_scanner_any<Mb_wc_utf8mb4>, 1>(
4867 54 cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix);
4868 }
4869
4870 26 Mb_wc_through_function_pointer mb_wc(cs);
4871
1/2
✓ Branch 0 taken 26 times.
✗ Branch 1 not taken.
26 return my_strnncoll_uca<uca_scanner_any<decltype(mb_wc)>, 1>(
4872 26 cs, mb_wc, s, slen, t, tlen, t_is_prefix);
4873 }
4874
4875 264540 static int my_strnncollsp_any_uca(const CHARSET_INFO *cs, const uchar *s,
4876 size_t slen, const uchar *t, size_t tlen) {
4877
2/2
✓ Branch 0 taken 145742 times.
✓ Branch 1 taken 118798 times.
264540 if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) {
4878
1/2
✓ Branch 0 taken 145742 times.
✗ Branch 1 not taken.
145742 return my_strnncollsp_uca(cs, Mb_wc_utf8mb4(), s, slen, t, tlen);
4879 }
4880
4881 118798 Mb_wc_through_function_pointer mb_wc(cs);
4882
1/2
✓ Branch 0 taken 118811 times.
✗ Branch 1 not taken.
118796 return my_strnncollsp_uca(cs, mb_wc, s, slen, t, tlen);
4883 }
4884
4885 2324 static void my_hash_sort_any_uca(const CHARSET_INFO *cs, const uchar *s,
4886 size_t slen, uint64 *n1, uint64 *n2) {
4887
2/2
✓ Branch 0 taken 648 times.
✓ Branch 1 taken 1676 times.
2324 if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) {
4888 648 my_hash_sort_uca(cs, Mb_wc_utf8mb4(), s, slen, n1, n2);
4889 } else {
4890 1676 Mb_wc_through_function_pointer mb_wc(cs);
4891
1/2
✓ Branch 0 taken 1676 times.
✗ Branch 1 not taken.
1676 my_hash_sort_uca(cs, mb_wc, s, slen, n1, n2);
4892 }
4893 2324 }
4894
4895 248749863 static size_t my_strnxfrm_any_uca(const CHARSET_INFO *cs, uchar *dst,
4896 size_t dstlen, uint num_codepoints,
4897 const uchar *src, size_t srclen, uint flags) {
4898
2/2
✓ Branch 0 taken 80229934 times.
✓ Branch 1 taken 168519929 times.
248749863 if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) {
4899
1/2
✓ Branch 0 taken 80229934 times.
✗ Branch 1 not taken.
80229934 return my_strnxfrm_uca(cs, Mb_wc_utf8mb4(), dst, dstlen, num_codepoints,
4900 80229934 src, srclen, flags);
4901 }
4902
4903 168519929 Mb_wc_through_function_pointer mb_wc(cs);
4904
1/2
✓ Branch 0 taken 168519929 times.
✗ Branch 1 not taken.
168519929 return my_strnxfrm_uca(cs, mb_wc, dst, dstlen, num_codepoints, src, srclen,
4905 168519929 flags);
4906 }
4907
4908 5211575177 static int my_strnncoll_uca_900(const CHARSET_INFO *cs, const uchar *s,
4909 size_t slen, const uchar *t, size_t tlen,
4910 bool t_is_prefix) {
4911
1/2
✓ Branch 0 taken 5211600671 times.
✗ Branch 1 not taken.
5211575177 if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) {
4912
3/5
✓ Branch 0 taken 5211594076 times.
✓ Branch 1 taken 4584 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2944 times.
✗ Branch 4 not taken.
5211600671 switch (cs->levels_for_compare) {
4913 5211594076 case 1:
4914
1/2
✓ Branch 0 taken 5211879234 times.
✗ Branch 1 not taken.
5211594076 return my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 1>, 1>(
4915 5211879234 cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix);
4916 4584 case 2:
4917
1/2
✓ Branch 0 taken 4584 times.
✗ Branch 1 not taken.
4584 return my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 2>, 2>(
4918 4584 cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix);
4919 default:
4920 assert(false);
4921 2944 case 3:
4922
1/2
✓ Branch 0 taken 2944 times.
✗ Branch 1 not taken.
2944 return my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 3>, 3>(
4923 2944 cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix);
4924 745 case 4:
4925
1/2
✓ Branch 0 taken 62 times.
✗ Branch 1 not taken.
745 return my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 4>, 4>(
4926 62 cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix);
4927 }
4928 }
4929
4930 Mb_wc_through_function_pointer mb_wc(cs);
4931 switch (cs->levels_for_compare) {
4932 case 1:
4933 return my_strnncoll_uca<uca_scanner_900<decltype(mb_wc), 1>, 1>(
4934 cs, mb_wc, s, slen, t, tlen, t_is_prefix);
4935 case 2:
4936 return my_strnncoll_uca<uca_scanner_900<decltype(mb_wc), 2>, 2>(
4937 cs, mb_wc, s, slen, t, tlen, t_is_prefix);
4938 default:
4939 assert(false);
4940 case 3:
4941 return my_strnncoll_uca<uca_scanner_900<decltype(mb_wc), 3>, 3>(
4942 cs, mb_wc, s, slen, t, tlen, t_is_prefix);
4943 case 4:
4944 return my_strnncoll_uca<uca_scanner_900<decltype(mb_wc), 4>, 4>(
4945 cs, mb_wc, s, slen, t, tlen, t_is_prefix);
4946 }
4947 }
4948
4949 812773129 static int my_strnncollsp_uca_900(const CHARSET_INFO *cs, const uchar *s,
4950 size_t slen, const uchar *t, size_t tlen) {
4951 // We are a NO PAD collation, so this is identical to strnncoll.
4952 812773129 return my_strnncoll_uca_900(cs, s, slen, t, tlen, false);
4953 }
4954
4955 } // extern "C"
4956
4957 template <class Mb_wc, int LEVELS_FOR_COMPARE>
4958 303824342 static void my_hash_sort_uca_900_tmpl(const CHARSET_INFO *cs, const Mb_wc mb_wc,
4959 const uchar *s, size_t slen, uint64 *n1) {
4960 303824342 uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE> scanner(mb_wc, cs, s, slen);
4961
4962 /*
4963 A variation of the FNV-1a hash. The differences between this and
4964 standard FNV-1a as described in literature are:
4965
4966 - We work naturally on 16-bit weights, so we XOR in the entire weight
4967 instead of hashing byte-by-byte. (This is effectively a speed/quality
4968 tradeoff, as it will reduce avalanche.)
4969 - We use the n1 seed by XOR-ing it onto the offset basis; FNV-1a as
4970 typically described does not use a seed. This should be safe, since
4971 there's nothing magical about the offset basis; it's just the FNV-1a
4972 hash of some human-readable text.
4973
4974 This is nowhere near a perfect hash function; it has suboptimal avalanche
4975 characteristics, and it not multicollision resistant. In particular,
4976 it fails many SMHasher tests, mostly for bias (collision tests are fine).
4977 However, it is of much better quality than the home-grown hash used
4978 for other collations (which fails _all_ SMHasher tests), while being
4979 much faster.
4980
4981 We ignore the n2 seed entirely, since we don't need it. The caller is
4982 responsible for doing hash folding at the end; we can't do that.
4983
4984 See http://isthe.com/chongo/tech/comp/fnv/#FNV-param for constants.
4985 */
4986
4987 303824372 uint64 h = *n1;
4988 303824372 h ^= 14695981039346656037ULL;
4989
4990
2/2
✓ Branch 0 taken 151912094 times.
✓ Branch 1 taken 92 times.
303824372 scanner.for_each_weight(
4991 2116889324 [&](int s_res, bool) -> bool {
4992 2116889324 h ^= s_res;
4993 2116889324 h *= 1099511628211ULL;
4994 2116889324 return true;
4995 },
4996 473260726 [](int) { return true; });
4997
4998 303824542 *n1 = h;
4999 }
5000
5001 extern "C" {
5002
5003 151911974 static void my_hash_sort_uca_900(const CHARSET_INFO *cs, const uchar *s,
5004 size_t slen, uint64 *n1, uint64 *) {
5005
1/2
✓ Branch 0 taken 151912101 times.
✗ Branch 1 not taken.
151911974 if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) {
5006
3/5
✓ Branch 0 taken 151912143 times.
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 141 times.
✗ Branch 4 not taken.
151912101 switch (cs->levels_for_compare) {
5007 151912143 case 1:
5008
1/2
✓ Branch 0 taken 151912328 times.
✗ Branch 1 not taken.
151912143 return my_hash_sort_uca_900_tmpl<Mb_wc_utf8mb4, 1>(cs, Mb_wc_utf8mb4(),
5009 151912328 s, slen, n1);
5010 14 case 2:
5011
1/2
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
14 return my_hash_sort_uca_900_tmpl<Mb_wc_utf8mb4, 2>(cs, Mb_wc_utf8mb4(),
5012 14 s, slen, n1);
5013 default:
5014 assert(false);
5015 141 case 3:
5016
1/2
✓ Branch 0 taken 141 times.
✗ Branch 1 not taken.
141 return my_hash_sort_uca_900_tmpl<Mb_wc_utf8mb4, 3>(cs, Mb_wc_utf8mb4(),
5017 141 s, slen, n1);
5018 case 4:
5019 return my_hash_sort_uca_900_tmpl<Mb_wc_utf8mb4, 4>(cs, Mb_wc_utf8mb4(),
5020 38 s, slen, n1);
5021 }
5022 }
5023
5024 Mb_wc_through_function_pointer mb_wc(cs);
5025 switch (cs->levels_for_compare) {
5026 case 1:
5027 return my_hash_sort_uca_900_tmpl<decltype(mb_wc), 1>(cs, mb_wc, s, slen,
5028 n1);
5029 case 2:
5030 return my_hash_sort_uca_900_tmpl<decltype(mb_wc), 2>(cs, mb_wc, s, slen,
5031 n1);
5032 default:
5033 assert(false);
5034 case 3:
5035 return my_hash_sort_uca_900_tmpl<decltype(mb_wc), 3>(cs, mb_wc, s, slen,
5036 n1);
5037 case 4:
5038 return my_hash_sort_uca_900_tmpl<decltype(mb_wc), 4>(cs, mb_wc, s, slen,
5039 n1);
5040 }
5041 }
5042
5043 } // extern "C"
5044
5045 /*
5046 Check if a constant can be propagated
5047
5048 Currently we don't check the constant itself, and decide not to propagate
5049 a constant just if the collation itself allows expansions or contractions.
5050 */
5051 189343 bool my_propagate_uca_900(const CHARSET_INFO *cs,
5052 const uchar *str [[maybe_unused]],
5053 size_t length [[maybe_unused]]) {
5054 189343 return !my_uca_have_contractions(cs->uca);
5055 }
5056
5057 template <class Mb_wc, int LEVELS_FOR_COMPARE>
5058 494915754 static size_t my_strnxfrm_uca_900_tmpl(const CHARSET_INFO *cs,
5059 const Mb_wc mb_wc, uchar *dst,
5060 size_t dstlen, const uchar *src,
5061 size_t srclen, uint flags) {
5062 494915754 uchar *d0 = dst;
5063 494915754 uchar *dst_end = dst + dstlen;
5064 494915754 uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE> scanner(mb_wc, cs, src, srclen);
5065
5066
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 247459484 times.
494916080 assert((dstlen % 2) == 0);
5067
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 247459484 times.
494918968 if ((dstlen % 2) == 1) {
5068 // Emergency workaround for optimized mode.
5069 --dst_end;
5070 }
5071
5072
2/2
✓ Branch 0 taken 247458029 times.
✓ Branch 1 taken 1455 times.
494918968 if (dst != dst_end) {
5073
2/2
✓ Branch 0 taken 50244123 times.
✓ Branch 1 taken 197213906 times.
989826970 scanner.for_each_weight(
5074 6200149933 [&dst, dst_end](int s_res,
5075 bool is_level_separator [[maybe_unused]]) -> bool {
5076
4/16
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 20287995 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 624007531 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 13394654 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 1409026496 times.
2066716676 assert(is_level_separator == (s_res == 0));
5077
1/4
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1409026496 times.
1409026496 if (LEVELS_FOR_COMPARE == 1) assert(!is_level_separator);
5078
5079 2066716676 dst = store16be(dst, s_res);
5080 2066716581 return (dst < dst_end);
5081 },
5082 313245093 [&dst, dst_end](int num_weights) {
5083 313245093 return (dst < dst_end - num_weights * 2);
5084 });
5085 }
5086
5087
2/2
✓ Branch 0 taken 79838 times.
✓ Branch 1 taken 247377073 times.
494913822 if (flags & MY_STRXFRM_PAD_TO_MAXLEN) {
5088 159676 memset(dst, 0, dst_end - dst);
5089 159676 dst = dst_end;
5090 }
5091
5092 494913822 return dst - d0;
5093 }
5094
5095 extern "C" {
5096
5097 247457290 static size_t my_strnxfrm_uca_900(const CHARSET_INFO *cs, uchar *dst,
5098 size_t dstlen,
5099 uint num_codepoints [[maybe_unused]],
5100 const uchar *src, size_t srclen, uint flags) {
5101
1/2
✓ Branch 0 taken 247457530 times.
✗ Branch 1 not taken.
247457290 if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) {
5102
4/5
✓ Branch 0 taken 137154357 times.
✓ Branch 1 taken 3344873 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 103615568 times.
✓ Branch 4 taken 3342732 times.
247457530 switch (cs->levels_for_compare) {
5103 137154357 case 1:
5104 137154357 return my_strnxfrm_uca_900_tmpl<Mb_wc_utf8mb4, 1>(
5105 137153061 cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags);
5106 3344873 case 2:
5107 3344873 return my_strnxfrm_uca_900_tmpl<Mb_wc_utf8mb4, 2>(
5108 3344873 cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags);
5109 default:
5110 assert(false);
5111 103615568 case 3:
5112 103615568 return my_strnxfrm_uca_900_tmpl<Mb_wc_utf8mb4, 3>(
5113 103615568 cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags);
5114 3342732 case 4:
5115 3342732 return my_strnxfrm_uca_900_tmpl<Mb_wc_utf8mb4, 4>(
5116 3342738 cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags);
5117 }
5118 } else {
5119 Mb_wc_through_function_pointer mb_wc(cs);
5120 switch (cs->levels_for_compare) {
5121 case 1:
5122 return my_strnxfrm_uca_900_tmpl<decltype(mb_wc), 1>(
5123 cs, mb_wc, dst, dstlen, src, srclen, flags);
5124 case 2:
5125 return my_strnxfrm_uca_900_tmpl<decltype(mb_wc), 2>(
5126 cs, mb_wc, dst, dstlen, src, srclen, flags);
5127 default:
5128 assert(false);
5129 case 3:
5130 return my_strnxfrm_uca_900_tmpl<decltype(mb_wc), 3>(
5131 cs, mb_wc, dst, dstlen, src, srclen, flags);
5132 case 4:
5133 return my_strnxfrm_uca_900_tmpl<decltype(mb_wc), 4>(
5134 cs, mb_wc, dst, dstlen, src, srclen, flags);
5135 }
5136 }
5137 }
5138
5139 1303790 static size_t my_strnxfrmlen_uca_900(const CHARSET_INFO *cs, size_t len) {
5140 /*
5141 The character with the most weights is U+FDFA ARABIC LIGATURE SALLALLAHOU
5142 ALAYHE WASALLAM, which we truncate to eight weights. This is the most we
5143 can get in regular DUCET.
5144
5145 In addition, collations with reorderings can add an extra weight per weight,
5146 which currently only happens on the primary level. We simulate this by
5147 simply adding an extra level.
5148
5149 One could conceivably have tailorings yielding expansions having more than
5150 this, but we don't currently, and mostly, tailorings are about contractions
5151 and adding single weights anyway.
5152
5153 We also need to add room for one level separator between each level.
5154 */
5155 // We really ought to have len % 4 == 0, but not all calling code conforms.
5156 1303790 const size_t num_codepoints = (len + 3) / 4;
5157 1303790 const size_t max_num_weights_per_level = num_codepoints * 8;
5158 1303790 size_t max_num_weights = max_num_weights_per_level * cs->levels_for_compare;
5159
4/4
✓ Branch 0 taken 798 times.
✓ Branch 1 taken 1302992 times.
✓ Branch 2 taken 638 times.
✓ Branch 3 taken 160 times.
1303790 if (cs->coll_param && cs->coll_param->reorder_param) {
5160 638 max_num_weights += max_num_weights_per_level;
5161 }
5162 1303790 return (max_num_weights + (cs->levels_for_compare - 1)) * sizeof(uint16_t);
5163 }
5164
5165 } // extern "C"
5166
5167 /*
5168 UCS2 optimized CHARSET_INFO compatible wrappers.
5169 */
5170 extern "C" {
5171 12 static int my_strnncoll_ucs2_uca(const CHARSET_INFO *cs, const uchar *s,
5172 size_t slen, const uchar *t, size_t tlen,
5173 bool t_is_prefix) {
5174 12 Mb_wc_through_function_pointer mb_wc(cs);
5175
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
12 return my_strnncoll_uca<uca_scanner_any<decltype(mb_wc)>, 1>(
5176 24 cs, mb_wc, s, slen, t, tlen, t_is_prefix);
5177 }
5178
5179 12139 static int my_strnncollsp_ucs2_uca(const CHARSET_INFO *cs, const uchar *s,
5180 size_t slen, const uchar *t, size_t tlen) {
5181 12139 Mb_wc_through_function_pointer mb_wc(cs);
5182
1/2
✓ Branch 0 taken 12139 times.
✗ Branch 1 not taken.
24278 return my_strnncollsp_uca(cs, mb_wc, s, slen, t, tlen);
5183 }
5184
5185 460 static void my_hash_sort_ucs2_uca(const CHARSET_INFO *cs, const uchar *s,
5186 size_t slen, uint64 *n1, uint64 *n2) {
5187 460 Mb_wc_through_function_pointer mb_wc(cs);
5188
1/2
✓ Branch 0 taken 460 times.
✗ Branch 1 not taken.
460 my_hash_sort_uca(cs, mb_wc, s, slen, n1, n2);
5189 460 }
5190
5191 4869686 static size_t my_strnxfrm_ucs2_uca(const CHARSET_INFO *cs, uchar *dst,
5192 size_t dstlen, uint num_codepoints,
5193 const uchar *src, size_t srclen,
5194 uint flags) {
5195 4869686 Mb_wc_through_function_pointer mb_wc(cs);
5196
1/2
✓ Branch 0 taken 4869686 times.
✗ Branch 1 not taken.
4869686 return my_strnxfrm_uca(cs, mb_wc, dst, dstlen, num_codepoints, src, srclen,
5197 9739372 flags);
5198 }
5199 } // extern "C"
5200
5201 MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = {
5202 my_coll_init_uca, /* init */
5203 my_coll_uninit_uca,
5204 my_strnncoll_ucs2_uca,
5205 my_strnncollsp_ucs2_uca,
5206 my_strnxfrm_ucs2_uca,
5207 my_strnxfrmlen_simple,
5208 my_like_range_generic,
5209 my_wildcmp_uca,
5210 nullptr,
5211 my_instr_mb,
5212 my_hash_sort_ucs2_uca,
5213 my_propagate_complex};
5214
5215 CHARSET_INFO my_charset_ucs2_unicode_ci = {
5216 128,
5217 0,
5218 0, /* number */
5219 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5220 "ucs2", /* cs name */
5221 "ucs2_unicode_ci", /* m_coll_name */
5222 "UCS-2 Unicode", /* comment */
5223 "", /* tailoring */
5224 nullptr, /* coll_param */
5225 nullptr, /* ctype */
5226 nullptr, /* to_lower */
5227 nullptr, /* to_upper */
5228 nullptr, /* sort_order */
5229 nullptr, /* uca */
5230 nullptr, /* tab_to_uni */
5231 nullptr, /* tab_from_uni */
5232 &my_unicase_default, /* caseinfo */
5233 nullptr, /* state_map */
5234 nullptr, /* ident_map */
5235 8, /* strxfrm_multiply */
5236 1, /* caseup_multiply */
5237 1, /* casedn_multiply */
5238 2, /* mbminlen */
5239 2, /* mbmaxlen */
5240 1, /* mbmaxlenlen */
5241 9, /* min_sort_char */
5242 0xFFFF, /* max_sort_char */
5243 ' ', /* pad char */
5244 false, /* escape_with_backslash_is_dangerous */
5245 1, /* levels_for_compare */
5246 &my_charset_ucs2_handler,
5247 &my_collation_ucs2_uca_handler,
5248 PAD_SPACE};
5249
5250 CHARSET_INFO my_charset_ucs2_icelandic_uca_ci = {
5251 129,
5252 0,
5253 0, /* number */
5254 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5255 "ucs2", /* cs name */
5256 "ucs2_icelandic_ci", /* m_coll_name */
5257 "UCS-2 Unicode", /* comment */
5258 icelandic, /* tailoring */
5259 nullptr, /* coll_param */
5260 nullptr, /* ctype */
5261 nullptr, /* to_lower */
5262 nullptr, /* to_upper */
5263 nullptr, /* sort_order */
5264 nullptr, /* uca */
5265 nullptr, /* tab_to_uni */
5266 nullptr, /* tab_from_uni */
5267 &my_unicase_default, /* caseinfo */
5268 nullptr, /* state_map */
5269 nullptr, /* ident_map */
5270 8, /* strxfrm_multiply */
5271 1, /* caseup_multiply */
5272 1, /* casedn_multiply */
5273 2, /* mbminlen */
5274 2, /* mbmaxlen */
5275 1, /* mbmaxlenlen */
5276 9, /* min_sort_char */
5277 0xFFFF, /* max_sort_char */
5278 ' ', /* pad char */
5279 false, /* escape_with_backslash_is_dangerous */
5280 1, /* levels_for_compare */
5281 &my_charset_ucs2_handler,
5282 &my_collation_ucs2_uca_handler,
5283 PAD_SPACE};
5284
5285 CHARSET_INFO my_charset_ucs2_latvian_uca_ci = {
5286 130,
5287 0,
5288 0, /* number */
5289 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5290 "ucs2", /* cs name */
5291 "ucs2_latvian_ci", /* m_coll_name */
5292 "UCS-2 Unicode", /* comment */
5293 latvian, /* tailoring */
5294 nullptr, /* coll_param */
5295 nullptr, /* ctype */
5296 nullptr, /* to_lower */
5297 nullptr, /* to_upper */
5298 nullptr, /* sort_order */
5299 nullptr, /* uca */
5300 nullptr, /* tab_to_uni */
5301 nullptr, /* tab_from_uni */
5302 &my_unicase_default, /* caseinfo */
5303 nullptr, /* state_map */
5304 nullptr, /* ident_map */
5305 8, /* strxfrm_multiply */
5306 1, /* caseup_multiply */
5307 1, /* casedn_multiply */
5308 2, /* mbminlen */
5309 2, /* mbmaxlen */
5310 1, /* mbmaxlenlen */
5311 9, /* min_sort_char */
5312 0xFFFF, /* max_sort_char */
5313 ' ', /* pad char */
5314 false, /* escape_with_backslash_is_dangerous */
5315 1, /* levels_for_compare */
5316 &my_charset_ucs2_handler,
5317 &my_collation_ucs2_uca_handler,
5318 PAD_SPACE};
5319
5320 CHARSET_INFO my_charset_ucs2_romanian_uca_ci = {
5321 131,
5322 0,
5323 0, /* number */
5324 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5325 "ucs2", /* cs name */
5326 "ucs2_romanian_ci", /* m_coll_name */
5327 "UCS-2 Unicode", /* comment */
5328 romanian, /* tailoring */
5329 nullptr, /* coll_param */
5330 nullptr, /* ctype */
5331 nullptr, /* to_lower */
5332 nullptr, /* to_upper */
5333 nullptr, /* sort_order */
5334 nullptr, /* uca */
5335 nullptr, /* tab_to_uni */
5336 nullptr, /* tab_from_uni */
5337 &my_unicase_default, /* caseinfo */
5338 nullptr, /* state_map */
5339 nullptr, /* ident_map */
5340 8, /* strxfrm_multiply */
5341 1, /* caseup_multiply */
5342 1, /* casedn_multiply */
5343 2, /* mbminlen */
5344 2, /* mbmaxlen */
5345 1, /* mbmaxlenlen */
5346 9, /* min_sort_char */
5347 0xFFFF, /* max_sort_char */
5348 ' ', /* pad char */
5349 false, /* escape_with_backslash_is_dangerous */
5350 1, /* levels_for_compare */
5351 &my_charset_ucs2_handler,
5352 &my_collation_ucs2_uca_handler,
5353 PAD_SPACE};
5354
5355 CHARSET_INFO my_charset_ucs2_slovenian_uca_ci = {
5356 132,
5357 0,
5358 0, /* number */
5359 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5360 "ucs2", /* cs name */
5361 "ucs2_slovenian_ci", /* m_coll_name */
5362 "UCS-2 Unicode", /* comment */
5363 slovenian, /* tailoring */
5364 nullptr, /* coll_param */
5365 nullptr, /* ctype */
5366 nullptr, /* to_lower */
5367 nullptr, /* to_upper */
5368 nullptr, /* sort_order */
5369 nullptr, /* uca */
5370 nullptr, /* tab_to_uni */
5371 nullptr, /* tab_from_uni */
5372 &my_unicase_default, /* caseinfo */
5373 nullptr, /* state_map */
5374 nullptr, /* ident_map */
5375 8, /* strxfrm_multiply */
5376 1, /* caseup_multiply */
5377 1, /* casedn_multiply */
5378 2, /* mbminlen */
5379 2, /* mbmaxlen */
5380 1, /* mbmaxlenlen */
5381 9, /* min_sort_char */
5382 0xFFFF, /* max_sort_char */
5383 ' ', /* pad char */
5384 false, /* escape_with_backslash_is_dangerous */
5385 1, /* levels_for_compare */
5386 &my_charset_ucs2_handler,
5387 &my_collation_ucs2_uca_handler,
5388 PAD_SPACE};
5389
5390 CHARSET_INFO my_charset_ucs2_polish_uca_ci = {
5391 133,
5392 0,
5393 0, /* number */
5394 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5395 "ucs2", /* cs name */
5396 "ucs2_polish_ci", /* m_coll_name */
5397 "UCS-2 Unicode", /* comment */
5398 polish, /* tailoring */
5399 nullptr, /* coll_param */
5400 nullptr, /* ctype */
5401 nullptr, /* to_lower */
5402 nullptr, /* to_upper */
5403 nullptr, /* sort_order */
5404 nullptr, /* uca */
5405 nullptr, /* tab_to_uni */
5406 nullptr, /* tab_from_uni */
5407 &my_unicase_default, /* caseinfo */
5408 nullptr, /* state_map */
5409 nullptr, /* ident_map */
5410 8, /* strxfrm_multiply */
5411 1, /* caseup_multiply */
5412 1, /* casedn_multiply */
5413 2, /* mbminlen */
5414 2, /* mbmaxlen */
5415 1, /* mbmaxlenlen */
5416 9, /* min_sort_char */
5417 0xFFFF, /* max_sort_char */
5418 ' ', /* pad char */
5419 false, /* escape_with_backslash_is_dangerous */
5420 1, /* levels_for_compare */
5421 &my_charset_ucs2_handler,
5422 &my_collation_ucs2_uca_handler,
5423 PAD_SPACE};
5424
5425 CHARSET_INFO my_charset_ucs2_estonian_uca_ci = {
5426 134,
5427 0,
5428 0, /* number */
5429 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5430 "ucs2", /* cs name */
5431 "ucs2_estonian_ci", /* m_coll_name */
5432 "UCS-2 Unicode", /* comment */
5433 estonian, /* tailoring */
5434 nullptr, /* coll_param */
5435 nullptr, /* ctype */
5436 nullptr, /* to_lower */
5437 nullptr, /* to_upper */
5438 nullptr, /* sort_order */
5439 nullptr, /* uca */
5440 nullptr, /* tab_to_uni */
5441 nullptr, /* tab_from_uni */
5442 &my_unicase_default, /* caseinfo */
5443 nullptr, /* state_map */
5444 nullptr, /* ident_map */
5445 8, /* strxfrm_multiply */
5446 1, /* caseup_multiply */
5447 1, /* casedn_multiply */
5448 2, /* mbminlen */
5449 2, /* mbmaxlen */
5450 1, /* mbmaxlenlen */
5451 9, /* min_sort_char */
5452 0xFFFF, /* max_sort_char */
5453 ' ', /* pad char */
5454 false, /* escape_with_backslash_is_dangerous */
5455 1, /* levels_for_compare */
5456 &my_charset_ucs2_handler,
5457 &my_collation_ucs2_uca_handler,
5458 PAD_SPACE};
5459
5460 CHARSET_INFO my_charset_ucs2_spanish_uca_ci = {
5461 135,
5462 0,
5463 0, /* number */
5464 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5465 "ucs2", /* cs name */
5466 "ucs2_spanish_ci", /* m_coll_name */
5467 "UCS-2 Unicode", /* comment */
5468 spanish, /* tailoring */
5469 nullptr, /* coll_param */
5470 nullptr, /* ctype */
5471 nullptr, /* to_lower */
5472 nullptr, /* to_upper */
5473 nullptr, /* sort_order */
5474 nullptr, /* uca */
5475 nullptr, /* tab_to_uni */
5476 nullptr, /* tab_from_uni */
5477 &my_unicase_default, /* caseinfo */
5478 nullptr, /* state_map */
5479 nullptr, /* ident_map */
5480 8, /* strxfrm_multiply */
5481 1, /* caseup_multiply */
5482 1, /* casedn_multiply */
5483 2, /* mbminlen */
5484 2, /* mbmaxlen */
5485 1, /* mbmaxlenlen */
5486 9, /* min_sort_char */
5487 0xFFFF, /* max_sort_char */
5488 ' ', /* pad char */
5489 false, /* escape_with_backslash_is_dangerous */
5490 1, /* levels_for_compare */
5491 &my_charset_ucs2_handler,
5492 &my_collation_ucs2_uca_handler,
5493 PAD_SPACE};
5494
5495 CHARSET_INFO my_charset_ucs2_swedish_uca_ci = {
5496 136,
5497 0,
5498 0, /* number */
5499 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5500 "ucs2", /* cs name */
5501 "ucs2_swedish_ci", /* m_coll_name */
5502 "UCS-2 Unicode", /* comment */
5503 swedish, /* tailoring */
5504 nullptr, /* coll_param */
5505 nullptr, /* ctype */
5506 nullptr, /* to_lower */
5507 nullptr, /* to_upper */
5508 nullptr, /* sort_order */
5509 nullptr, /* uca */
5510 nullptr, /* tab_to_uni */
5511 nullptr, /* tab_from_uni */
5512 &my_unicase_default, /* caseinfo */
5513 nullptr, /* state_map */
5514 nullptr, /* ident_map */
5515 8, /* strxfrm_multiply */
5516 1, /* caseup_multiply */
5517 1, /* casedn_multiply */
5518 2, /* mbminlen */
5519 2, /* mbmaxlen */
5520 1, /* mbmaxlenlen */
5521 9, /* min_sort_char */
5522 0xFFFF, /* max_sort_char */
5523 ' ', /* pad char */
5524 false, /* escape_with_backslash_is_dangerous */
5525 1, /* levels_for_compare */
5526 &my_charset_ucs2_handler,
5527 &my_collation_ucs2_uca_handler,
5528 PAD_SPACE};
5529
5530 CHARSET_INFO my_charset_ucs2_turkish_uca_ci = {
5531 137,
5532 0,
5533 0, /* number */
5534 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5535 "ucs2", /* cs name */
5536 "ucs2_turkish_ci", /* m_coll_name */
5537 "UCS-2 Unicode", /* comment */
5538 turkish, /* tailoring */
5539 nullptr, /* coll_param */
5540 nullptr, /* ctype */
5541 nullptr, /* to_lower */
5542 nullptr, /* to_upper */
5543 nullptr, /* sort_order */
5544 nullptr, /* uca */
5545 nullptr, /* tab_to_uni */
5546 nullptr, /* tab_from_uni */
5547 &my_unicase_turkish, /* caseinfo */
5548 nullptr, /* state_map */
5549 nullptr, /* ident_map */
5550 8, /* strxfrm_multiply */
5551 1, /* caseup_multiply */
5552 1, /* casedn_multiply */
5553 2, /* mbminlen */
5554 2, /* mbmaxlen */
5555 1, /* mbmaxlenlen */
5556 9, /* min_sort_char */
5557 0xFFFF, /* max_sort_char */
5558 ' ', /* pad char */
5559 false, /* escape_with_backslash_is_dangerous */
5560 1, /* levels_for_compare */
5561 &my_charset_ucs2_handler,
5562 &my_collation_ucs2_uca_handler,
5563 PAD_SPACE};
5564
5565 CHARSET_INFO my_charset_ucs2_czech_uca_ci = {
5566 138,
5567 0,
5568 0, /* number */
5569 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5570 "ucs2", /* cs name */
5571 "ucs2_czech_ci", /* m_coll_name */
5572 "UCS-2 Unicode", /* comment */
5573 czech, /* tailoring */
5574 nullptr, /* coll_param */
5575 nullptr, /* ctype */
5576 nullptr, /* to_lower */
5577 nullptr, /* to_upper */
5578 nullptr, /* sort_order */
5579 nullptr, /* uca */
5580 nullptr, /* tab_to_uni */
5581 nullptr, /* tab_from_uni */
5582 &my_unicase_default, /* caseinfo */
5583 nullptr, /* state_map */
5584 nullptr, /* ident_map */
5585 8, /* strxfrm_multiply */
5586 1, /* caseup_multiply */
5587 1, /* casedn_multiply */
5588 2, /* mbminlen */
5589 2, /* mbmaxlen */
5590 1, /* mbmaxlenlen */
5591 9, /* min_sort_char */
5592 0xFFFF, /* max_sort_char */
5593 ' ', /* pad char */
5594 false, /* escape_with_backslash_is_dangerous */
5595 1, /* levels_for_compare */
5596 &my_charset_ucs2_handler,
5597 &my_collation_ucs2_uca_handler,
5598 PAD_SPACE};
5599
5600 CHARSET_INFO my_charset_ucs2_danish_uca_ci = {
5601 139,
5602 0,
5603 0, /* number */
5604 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5605 "ucs2", /* cs name */
5606 "ucs2_danish_ci", /* m_coll_name */
5607 "UCS-2 Unicode", /* comment */
5608 danish, /* tailoring */
5609 nullptr, /* coll_param */
5610 nullptr, /* ctype */
5611 nullptr, /* to_lower */
5612 nullptr, /* to_upper */
5613 nullptr, /* sort_order */
5614 nullptr, /* uca */
5615 nullptr, /* tab_to_uni */
5616 nullptr, /* tab_from_uni */
5617 &my_unicase_default, /* caseinfo */
5618 nullptr, /* state_map */
5619 nullptr, /* ident_map */
5620 8, /* strxfrm_multiply */
5621 1, /* caseup_multiply */
5622 1, /* casedn_multiply */
5623 2, /* mbminlen */
5624 2, /* mbmaxlen */
5625 1, /* mbmaxlenlen */
5626 9, /* min_sort_char */
5627 0xFFFF, /* max_sort_char */
5628 ' ', /* pad char */
5629 false, /* escape_with_backslash_is_dangerous */
5630 1, /* levels_for_compare */
5631 &my_charset_ucs2_handler,
5632 &my_collation_ucs2_uca_handler,
5633 PAD_SPACE};
5634
5635 CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci = {
5636 140,
5637 0,
5638 0, /* number */
5639 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5640 "ucs2", /* cs name */
5641 "ucs2_lithuanian_ci", /* m_coll_name */
5642 "UCS-2 Unicode", /* comment */
5643 lithuanian, /* tailoring */
5644 nullptr, /* coll_param */
5645 nullptr, /* ctype */
5646 nullptr, /* to_lower */
5647 nullptr, /* to_upper */
5648 nullptr, /* sort_order */
5649 nullptr, /* uca */
5650 nullptr, /* tab_to_uni */
5651 nullptr, /* tab_from_uni */
5652 &my_unicase_default, /* caseinfo */
5653 nullptr, /* state_map */
5654 nullptr, /* ident_map */
5655 8, /* strxfrm_multiply */
5656 1, /* caseup_multiply */
5657 1, /* casedn_multiply */
5658 2, /* mbminlen */
5659 2, /* mbmaxlen */
5660 1, /* mbmaxlenlen */
5661 9, /* min_sort_char */
5662 0xFFFF, /* max_sort_char */
5663 ' ', /* pad char */
5664 false, /* escape_with_backslash_is_dangerous */
5665 1, /* levels_for_compare */
5666 &my_charset_ucs2_handler,
5667 &my_collation_ucs2_uca_handler,
5668 PAD_SPACE};
5669
5670 CHARSET_INFO my_charset_ucs2_slovak_uca_ci = {
5671 141,
5672 0,
5673 0, /* number */
5674 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5675 "ucs2", /* cs name */
5676 "ucs2_slovak_ci", /* m_coll_name */
5677 "UCS-2 Unicode", /* comment */
5678 slovak, /* tailoring */
5679 nullptr, /* coll_param */
5680 nullptr, /* ctype */
5681 nullptr, /* to_lower */
5682 nullptr, /* to_upper */
5683 nullptr, /* sort_order */
5684 nullptr, /* uca */
5685 nullptr, /* tab_to_uni */
5686 nullptr, /* tab_from_uni */
5687 &my_unicase_default, /* caseinfo */
5688 nullptr, /* state_map */
5689 nullptr, /* ident_map */
5690 8, /* strxfrm_multiply */
5691 1, /* caseup_multiply */
5692 1, /* casedn_multiply */
5693 2, /* mbminlen */
5694 2, /* mbmaxlen */
5695 1, /* mbmaxlenlen */
5696 9, /* min_sort_char */
5697 0xFFFF, /* max_sort_char */
5698 ' ', /* pad char */
5699 false, /* escape_with_backslash_is_dangerous */
5700 1, /* levels_for_compare */
5701 &my_charset_ucs2_handler,
5702 &my_collation_ucs2_uca_handler,
5703 PAD_SPACE};
5704
5705 CHARSET_INFO my_charset_ucs2_spanish2_uca_ci = {
5706 142,
5707 0,
5708 0, /* number */
5709 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5710 "ucs2", /* cs name */
5711 "ucs2_spanish2_ci", /* m_coll_name */
5712 "UCS-2 Unicode", /* comment */
5713 spanish2, /* tailoring */
5714 nullptr, /* coll_param */
5715 nullptr, /* ctype */
5716 nullptr, /* to_lower */
5717 nullptr, /* to_upper */
5718 nullptr, /* sort_order */
5719 nullptr, /* uca */
5720 nullptr, /* tab_to_uni */
5721 nullptr, /* tab_from_uni */
5722 &my_unicase_default, /* caseinfo */
5723 nullptr, /* state_map */
5724 nullptr, /* ident_map */
5725 8, /* strxfrm_multiply */
5726 1, /* caseup_multiply */
5727 1, /* casedn_multiply */
5728 2, /* mbminlen */
5729 2, /* mbmaxlen */
5730 1, /* mbmaxlenlen */
5731 9, /* min_sort_char */
5732 0xFFFF, /* max_sort_char */
5733 ' ', /* pad char */
5734 false, /* escape_with_backslash_is_dangerous */
5735 1, /* levels_for_compare */
5736 &my_charset_ucs2_handler,
5737 &my_collation_ucs2_uca_handler,
5738 PAD_SPACE};
5739
5740 CHARSET_INFO my_charset_ucs2_roman_uca_ci = {
5741 143,
5742 0,
5743 0, /* number */
5744 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5745 "ucs2", /* cs name */
5746 "ucs2_roman_ci", /* m_coll_name */
5747 "UCS-2 Unicode", /* comment */
5748 roman, /* tailoring */
5749 nullptr, /* coll_param */
5750 nullptr, /* ctype */
5751 nullptr, /* to_lower */
5752 nullptr, /* to_upper */
5753 nullptr, /* sort_order */
5754 nullptr, /* uca */
5755 nullptr, /* tab_to_uni */
5756 nullptr, /* tab_from_uni */
5757 &my_unicase_default, /* caseinfo */
5758 nullptr, /* state_map */
5759 nullptr, /* ident_map */
5760 8, /* strxfrm_multiply */
5761 1, /* caseup_multiply */
5762 1, /* casedn_multiply */
5763 2, /* mbminlen */
5764 2, /* mbmaxlen */
5765 1, /* mbmaxlenlen */
5766 9, /* min_sort_char */
5767 0xFFFF, /* max_sort_char */
5768 ' ', /* pad char */
5769 false, /* escape_with_backslash_is_dangerous */
5770 1, /* levels_for_compare */
5771 &my_charset_ucs2_handler,
5772 &my_collation_ucs2_uca_handler,
5773 PAD_SPACE};
5774
5775 CHARSET_INFO my_charset_ucs2_persian_uca_ci = {
5776 144,
5777 0,
5778 0, /* number */
5779 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5780 "ucs2", /* cs name */
5781 "ucs2_persian_ci", /* m_coll_name */
5782 "UCS-2 Unicode", /* comment */
5783 persian, /* tailoring */
5784 nullptr, /* coll_param */
5785 nullptr, /* ctype */
5786 nullptr, /* to_lower */
5787 nullptr, /* to_upper */
5788 nullptr, /* sort_order */
5789 nullptr, /* uca */
5790 nullptr, /* tab_to_uni */
5791 nullptr, /* tab_from_uni */
5792 &my_unicase_default, /* caseinfo */
5793 nullptr, /* state_map */
5794 nullptr, /* ident_map */
5795 8, /* strxfrm_multiply */
5796 1, /* caseup_multiply */
5797 1, /* casedn_multiply */
5798 2, /* mbminlen */
5799 2, /* mbmaxlen */
5800 1, /* mbmaxlenlen */
5801 9, /* min_sort_char */
5802 0xFFFF, /* max_sort_char */
5803 ' ', /* pad char */
5804 false, /* escape_with_backslash_is_dangerous */
5805 1, /* levels_for_compare */
5806 &my_charset_ucs2_handler,
5807 &my_collation_ucs2_uca_handler,
5808 PAD_SPACE};
5809
5810 CHARSET_INFO my_charset_ucs2_esperanto_uca_ci = {
5811 145,
5812 0,
5813 0, /* number */
5814 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5815 "ucs2", /* cs name */
5816 "ucs2_esperanto_ci", /* m_coll_name */
5817 "UCS-2 Unicode", /* comment */
5818 esperanto, /* tailoring */
5819 nullptr, /* coll_param */
5820 nullptr, /* ctype */
5821 nullptr, /* to_lower */
5822 nullptr, /* to_upper */
5823 nullptr, /* sort_order */
5824 nullptr, /* uca */
5825 nullptr, /* tab_to_uni */
5826 nullptr, /* tab_from_uni */
5827 &my_unicase_default, /* caseinfo */
5828 nullptr, /* state_map */
5829 nullptr, /* ident_map */
5830 8, /* strxfrm_multiply */
5831 1, /* caseup_multiply */
5832 1, /* casedn_multiply */
5833 2, /* mbminlen */
5834 2, /* mbmaxlen */
5835 1, /* mbmaxlenlen */
5836 9, /* min_sort_char */
5837 0xFFFF, /* max_sort_char */
5838 ' ', /* pad char */
5839 false, /* escape_with_backslash_is_dangerous */
5840 1, /* levels_for_compare */
5841 &my_charset_ucs2_handler,
5842 &my_collation_ucs2_uca_handler,
5843 PAD_SPACE};
5844
5845 CHARSET_INFO my_charset_ucs2_hungarian_uca_ci = {
5846 146,
5847 0,
5848 0, /* number */
5849 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5850 "ucs2", /* cs name */
5851 "ucs2_hungarian_ci", /* m_coll_name */
5852 "UCS-2 Unicode", /* comment */
5853 hungarian, /* tailoring */
5854 nullptr, /* coll_param */
5855 nullptr, /* ctype */
5856 nullptr, /* to_lower */
5857 nullptr, /* to_upper */
5858 nullptr, /* sort_order */
5859 nullptr, /* uca */
5860 nullptr, /* tab_to_uni */
5861 nullptr, /* tab_from_uni */
5862 &my_unicase_default, /* caseinfo */
5863 nullptr, /* state_map */
5864 nullptr, /* ident_map */
5865 8, /* strxfrm_multiply */
5866 1, /* caseup_multiply */
5867 1, /* casedn_multiply */
5868 2, /* mbminlen */
5869 2, /* mbmaxlen */
5870 1, /* mbmaxlenlen */
5871 9, /* min_sort_char */
5872 0xFFFF, /* max_sort_char */
5873 ' ', /* pad char */
5874 false, /* escape_with_backslash_is_dangerous */
5875 1, /* levels_for_compare */
5876 &my_charset_ucs2_handler,
5877 &my_collation_ucs2_uca_handler,
5878 PAD_SPACE};
5879
5880 CHARSET_INFO my_charset_ucs2_sinhala_uca_ci = {
5881 147,
5882 0,
5883 0, /* number */
5884 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5885 "ucs2", /* csname */
5886 "ucs2_sinhala_ci", /* m_coll_name */
5887 "UCS-2 Unicode", /* comment */
5888 sinhala, /* tailoring */
5889 nullptr, /* coll_param */
5890 nullptr, /* ctype */
5891 nullptr, /* to_lower */
5892 nullptr, /* to_upper */
5893 nullptr, /* sort_order */
5894 nullptr, /* uca */
5895 nullptr, /* tab_to_uni */
5896 nullptr, /* tab_from_uni */
5897 &my_unicase_default, /* caseinfo */
5898 nullptr, /* state_map */
5899 nullptr, /* ident_map */
5900 8, /* strxfrm_multiply */
5901 1, /* caseup_multiply */
5902 1, /* casedn_multiply */
5903 2, /* mbminlen */
5904 2, /* mbmaxlen */
5905 1, /* mbmaxlenlen */
5906 9, /* min_sort_char */
5907 0xFFFF, /* max_sort_char */
5908 ' ', /* pad char */
5909 false, /* escape_with_backslash_is_dangerous */
5910 1, /* levels_for_compare */
5911 &my_charset_ucs2_handler,
5912 &my_collation_ucs2_uca_handler,
5913 PAD_SPACE};
5914
5915 CHARSET_INFO my_charset_ucs2_german2_uca_ci = {
5916 148,
5917 0,
5918 0, /* number */
5919 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5920 "ucs2", /* csname */
5921 "ucs2_german2_ci", /* m_coll_name */
5922 "UCS-2 Unicode", /* comment */
5923 german2, /* tailoring */
5924 nullptr, /* coll_param */
5925 nullptr, /* ctype */
5926 nullptr, /* to_lower */
5927 nullptr, /* to_upper */
5928 nullptr, /* sort_order */
5929 nullptr, /* uca */
5930 nullptr, /* tab_to_uni */
5931 nullptr, /* tab_from_uni */
5932 &my_unicase_default, /* caseinfo */
5933 nullptr, /* state_map */
5934 nullptr, /* ident_map */
5935 8, /* strxfrm_multiply */
5936 1, /* caseup_multiply */
5937 1, /* casedn_multiply */
5938 2, /* mbminlen */
5939 2, /* mbmaxlen */
5940 1, /* mbmaxlenlen */
5941 9, /* min_sort_char */
5942 0xFFFF, /* max_sort_char */
5943 ' ', /* pad char */
5944 false, /* escape_with_backslash_is_dangerous */
5945 1, /* levels_for_compare */
5946 &my_charset_ucs2_handler,
5947 &my_collation_ucs2_uca_handler,
5948 PAD_SPACE};
5949
5950 CHARSET_INFO my_charset_ucs2_croatian_uca_ci = {
5951 149,
5952 0,
5953 0, /* number */
5954 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5955 "ucs2", /* csname */
5956 "ucs2_croatian_ci", /* m_coll_name */
5957 "UCS-2 Unicode", /* comment */
5958 croatian, /* tailoring */
5959 nullptr, /* coll_param */
5960 nullptr, /* ctype */
5961 nullptr, /* to_lower */
5962 nullptr, /* to_upper */
5963 nullptr, /* sort_order */
5964 nullptr, /* uca */
5965 nullptr, /* tab_to_uni */
5966 nullptr, /* tab_from_uni */
5967 &my_unicase_default, /* caseinfo */
5968 nullptr, /* state_map */
5969 nullptr, /* ident_map */
5970 8, /* strxfrm_multiply */
5971 1, /* caseup_multiply */
5972 1, /* casedn_multiply */
5973 2, /* mbminlen */
5974 2, /* mbmaxlen */
5975 1, /* mbmaxlenlen */
5976 9, /* min_sort_char */
5977 0xFFFF, /* max_sort_char */
5978 ' ', /* pad char */
5979 false, /* escape_with_backslash_is_dangerous */
5980 1, /* levels_for_compare */
5981 &my_charset_ucs2_handler,
5982 &my_collation_ucs2_uca_handler,
5983 PAD_SPACE};
5984
5985 CHARSET_INFO my_charset_ucs2_unicode_520_ci = {
5986 150,
5987 0,
5988 0, /* number */
5989 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
5990 "ucs2", /* cs name */
5991 "ucs2_unicode_520_ci", /* m_coll_name */
5992 "UCS-2 Unicode", /* comment */
5993 "", /* tailoring */
5994 nullptr, /* coll_param */
5995 nullptr, /* ctype */
5996 nullptr, /* to_lower */
5997 nullptr, /* to_upper */
5998 nullptr, /* sort_order */
5999 &my_uca_v520, /* uca */
6000 nullptr, /* tab_to_uni */
6001 nullptr, /* tab_from_uni */
6002 &my_unicase_unicode520, /* caseinfo */
6003 nullptr, /* state_map */
6004 nullptr, /* ident_map */
6005 8, /* strxfrm_multiply */
6006 1, /* caseup_multiply */
6007 1, /* casedn_multiply */
6008 2, /* mbminlen */
6009 2, /* mbmaxlen */
6010 1, /* mbmaxlenlen */
6011 9, /* min_sort_char */
6012 0xFFFF, /* max_sort_char */
6013 ' ', /* pad char */
6014 false, /* escape_with_backslash_is_dangerous */
6015 1, /* levels_for_compare */
6016 &my_charset_ucs2_handler,
6017 &my_collation_ucs2_uca_handler,
6018 PAD_SPACE};
6019
6020 CHARSET_INFO my_charset_ucs2_vietnamese_ci = {
6021 151,
6022 0,
6023 0, /* number */
6024 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
6025 "ucs2", /* csname */
6026 "ucs2_vietnamese_ci", /* m_coll_name */
6027 "UCS-2 Unicode", /* comment */
6028 vietnamese, /* tailoring */
6029 nullptr, /* coll_param */
6030 nullptr, /* ctype */
6031 nullptr, /* to_lower */
6032 nullptr, /* to_upper */
6033 nullptr, /* sort_order */
6034 nullptr, /* uca */
6035 nullptr, /* tab_to_uni */
6036 nullptr, /* tab_from_uni */
6037 &my_unicase_default, /* caseinfo */
6038 nullptr, /* state_map */
6039 nullptr, /* ident_map */
6040 8, /* strxfrm_multiply */
6041 1, /* caseup_multiply */
6042 1, /* casedn_multiply */
6043 2, /* mbminlen */
6044 2, /* mbmaxlen */
6045 1, /* mbmaxlenlen */
6046 9, /* min_sort_char */
6047 0xFFFF, /* max_sort_char */
6048 ' ', /* pad char */
6049 false, /* escape_with_backslash_is_dangerous */
6050 1, /* levels_for_compare */
6051 &my_charset_ucs2_handler,
6052 &my_collation_ucs2_uca_handler,
6053 PAD_SPACE};
6054
6055 MY_COLLATION_HANDLER my_collation_any_uca_handler = {
6056 my_coll_init_uca, /* init */
6057 my_coll_uninit_uca, my_strnncoll_any_uca, my_strnncollsp_any_uca,
6058 my_strnxfrm_any_uca, my_strnxfrmlen_simple, my_like_range_mb,
6059 my_wildcmp_uca, my_strcasecmp_uca, my_instr_mb,
6060 my_hash_sort_any_uca, my_propagate_complex};
6061
6062 MY_COLLATION_HANDLER my_collation_uca_900_handler = {
6063 my_coll_init_uca, /* init */
6064 my_coll_uninit_uca, my_strnncoll_uca_900, my_strnncollsp_uca_900,
6065 my_strnxfrm_uca_900, my_strnxfrmlen_uca_900, my_like_range_mb,
6066 my_wildcmp_uca, my_strcasecmp_uca, my_instr_mb,
6067 my_hash_sort_uca_900, my_propagate_uca_900};
6068
6069 /*
6070 We consider bytes with code more than 127 as a letter.
6071 This guarantees that word boundaries work fine with regular
6072 expressions. Note, there is no need to mark byte 255 as a
6073 letter, it is illegal byte in UTF8.
6074 */
6075 static const uchar ctype_utf8[] = {
6076 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32,
6077 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
6078 32, 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
6079 16, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 16, 16, 16, 16, 16,
6080 16, 16, 129, 129, 129, 129, 129, 129, 1, 1, 1, 1, 1, 1, 1, 1,
6081 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16,
6082 16, 16, 130, 130, 130, 130, 130, 130, 2, 2, 2, 2, 2, 2, 2, 2,
6083 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16,
6084 32, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6085 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6086 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6087 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6088 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6089 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6090 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6091 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6092 0};
6093
6094 extern MY_CHARSET_HANDLER my_charset_utf8_handler;
6095
6096 #define MY_CS_UTF8MB3_UCA_FLAGS \
6097 (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE)
6098
6099 CHARSET_INFO my_charset_utf8_unicode_ci = {
6100 192,
6101 0,
6102 0, /* number */
6103 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6104 "utf8mb3", /* cs name */
6105 "utf8mb3_unicode_ci", /* m_coll_name */
6106 "UCS-2 Unicode", /* comment */
6107 "", /* tailoring */
6108 nullptr, /* coll_param */
6109 ctype_utf8, /* ctype */
6110 nullptr, /* to_lower */
6111 nullptr, /* to_upper */
6112 nullptr, /* sort_order */
6113 nullptr, /* uca */
6114 nullptr, /* tab_to_uni */
6115 nullptr, /* tab_from_uni */
6116 &my_unicase_default, /* caseinfo */
6117 nullptr, /* state_map */
6118 nullptr, /* ident_map */
6119 8, /* strxfrm_multiply */
6120 1, /* caseup_multiply */
6121 1, /* casedn_multiply */
6122 1, /* mbminlen */
6123 3, /* mbmaxlen */
6124 1, /* mbmaxlenlen */
6125 9, /* min_sort_char */
6126 0xFFFF, /* max_sort_char */
6127 ' ', /* pad char */
6128 false, /* escape_with_backslash_is_dangerous */
6129 1, /* levels_for_compare */
6130 &my_charset_utf8_handler,
6131 &my_collation_any_uca_handler,
6132 PAD_SPACE};
6133
6134 CHARSET_INFO my_charset_utf8_icelandic_uca_ci = {
6135 193,
6136 0,
6137 0, /* number */
6138 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6139 "utf8mb3", /* cs name */
6140 "utf8mb3_icelandic_ci", /* m_coll_name */
6141 "UTF-8 Unicode", /* comment */
6142 icelandic, /* tailoring */
6143 nullptr, /* coll_param */
6144 ctype_utf8, /* ctype */
6145 nullptr, /* to_lower */
6146 nullptr, /* to_upper */
6147 nullptr, /* sort_order */
6148 nullptr, /* uca */
6149 nullptr, /* tab_to_uni */
6150 nullptr, /* tab_from_uni */
6151 &my_unicase_default, /* caseinfo */
6152 nullptr, /* state_map */
6153 nullptr, /* ident_map */
6154 8, /* strxfrm_multiply */
6155 1, /* caseup_multiply */
6156 1, /* casedn_multiply */
6157 1, /* mbminlen */
6158 3, /* mbmaxlen */
6159 1, /* mbmaxlenlen */
6160 9, /* min_sort_char */
6161 0xFFFF, /* max_sort_char */
6162 ' ', /* pad char */
6163 false, /* escape_with_backslash_is_dangerous */
6164 1, /* levels_for_compare */
6165 &my_charset_utf8_handler,
6166 &my_collation_any_uca_handler,
6167 PAD_SPACE};
6168
6169 CHARSET_INFO my_charset_utf8_latvian_uca_ci = {
6170 194,
6171 0,
6172 0, /* number */
6173 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6174 "utf8mb3", /* cs name */
6175 "utf8mb3_latvian_ci", /* m_coll_name */
6176 "UTF-8 Unicode", /* comment */
6177 latvian, /* tailoring */
6178 nullptr, /* coll_param */
6179 ctype_utf8, /* ctype */
6180 nullptr, /* to_lower */
6181 nullptr, /* to_upper */
6182 nullptr, /* sort_order */
6183 nullptr, /* uca */
6184 nullptr, /* tab_to_uni */
6185 nullptr, /* tab_from_uni */
6186 &my_unicase_default, /* caseinfo */
6187 nullptr, /* state_map */
6188 nullptr, /* ident_map */
6189 8, /* strxfrm_multiply */
6190 1, /* caseup_multiply */
6191 1, /* casedn_multiply */
6192 1, /* mbminlen */
6193 3, /* mbmaxlen */
6194 1, /* mbmaxlenlen */
6195 9, /* min_sort_char */
6196 0xFFFF, /* max_sort_char */
6197 ' ', /* pad char */
6198 false, /* escape_with_backslash_is_dangerous */
6199 1, /* levels_for_compare */
6200 &my_charset_utf8_handler,
6201 &my_collation_any_uca_handler,
6202 PAD_SPACE};
6203
6204 CHARSET_INFO my_charset_utf8_romanian_uca_ci = {
6205 195,
6206 0,
6207 0, /* number */
6208 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6209 "utf8mb3", /* cs name */
6210 "utf8mb3_romanian_ci", /* m_coll_name */
6211 "UTF-8 Unicode", /* comment */
6212 romanian, /* tailoring */
6213 nullptr, /* coll_param */
6214 ctype_utf8, /* ctype */
6215 nullptr, /* to_lower */
6216 nullptr, /* to_upper */
6217 nullptr, /* sort_order */
6218 nullptr, /* uca */
6219 nullptr, /* tab_to_uni */
6220 nullptr, /* tab_from_uni */
6221 &my_unicase_default, /* caseinfo */
6222 nullptr, /* state_map */
6223 nullptr, /* ident_map */
6224 8, /* strxfrm_multiply */
6225 1, /* caseup_multiply */
6226 1, /* casedn_multiply */
6227 1, /* mbminlen */
6228 3, /* mbmaxlen */
6229 1, /* mbmaxlenlen */
6230 9, /* min_sort_char */
6231 0xFFFF, /* max_sort_char */
6232 ' ', /* pad char */
6233 false, /* escape_with_backslash_is_dangerous */
6234 1, /* levels_for_compare */
6235 &my_charset_utf8_handler,
6236 &my_collation_any_uca_handler,
6237 PAD_SPACE};
6238
6239 CHARSET_INFO my_charset_utf8_slovenian_uca_ci = {
6240 196,
6241 0,
6242 0, /* number */
6243 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6244 "utf8mb3", /* cs name */
6245 "utf8mb3_slovenian_ci", /* m_coll_name */
6246 "UTF-8 Unicode", /* comment */
6247 slovenian, /* tailoring */
6248 nullptr, /* coll_param */
6249 ctype_utf8, /* ctype */
6250 nullptr, /* to_lower */
6251 nullptr, /* to_upper */
6252 nullptr, /* sort_order */
6253 nullptr, /* uca */
6254 nullptr, /* tab_to_uni */
6255 nullptr, /* tab_from_uni */
6256 &my_unicase_default, /* caseinfo */
6257 nullptr, /* state_map */
6258 nullptr, /* ident_map */
6259 8, /* strxfrm_multiply */
6260 1, /* caseup_multiply */
6261 1, /* casedn_multiply */
6262 1, /* mbminlen */
6263 3, /* mbmaxlen */
6264 1, /* mbmaxlenlen */
6265 9, /* min_sort_char */
6266 0xFFFF, /* max_sort_char */
6267 ' ', /* pad char */
6268 false, /* escape_with_backslash_is_dangerous */
6269 1, /* levels_for_compare */
6270 &my_charset_utf8_handler,
6271 &my_collation_any_uca_handler,
6272 PAD_SPACE};
6273
6274 CHARSET_INFO my_charset_utf8_polish_uca_ci = {
6275 197,
6276 0,
6277 0, /* number */
6278 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6279 "utf8mb3", /* cs name */
6280 "utf8mb3_polish_ci", /* m_coll_name */
6281 "UTF-8 Unicode", /* comment */
6282 polish, /* tailoring */
6283 nullptr, /* coll_param */
6284 ctype_utf8, /* ctype */
6285 nullptr, /* to_lower */
6286 nullptr, /* to_upper */
6287 nullptr, /* sort_order */
6288 nullptr, /* uca */
6289 nullptr, /* tab_to_uni */
6290 nullptr, /* tab_from_uni */
6291 &my_unicase_default, /* caseinfo */
6292 nullptr, /* state_map */
6293 nullptr, /* ident_map */
6294 8, /* strxfrm_multiply */
6295 1, /* caseup_multiply */
6296 1, /* casedn_multiply */
6297 1, /* mbminlen */
6298 3, /* mbmaxlen */
6299 1, /* mbmaxlenlen */
6300 9, /* min_sort_char */
6301 0xFFFF, /* max_sort_char */
6302 ' ', /* pad char */
6303 false, /* escape_with_backslash_is_dangerous */
6304 1, /* levels_for_compare */
6305 &my_charset_utf8_handler,
6306 &my_collation_any_uca_handler,
6307 PAD_SPACE};
6308
6309 CHARSET_INFO my_charset_utf8_estonian_uca_ci = {
6310 198,
6311 0,
6312 0, /* number */
6313 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6314 "utf8mb3", /* cs name */
6315 "utf8mb3_estonian_ci", /* m_coll_name */
6316 "UTF-8 Unicode", /* comment */
6317 estonian, /* tailoring */
6318 nullptr, /* coll_param */
6319 ctype_utf8, /* ctype */
6320 nullptr, /* to_lower */
6321 nullptr, /* to_upper */
6322 nullptr, /* sort_order */
6323 nullptr, /* uca */
6324 nullptr, /* tab_to_uni */
6325 nullptr, /* tab_from_uni */
6326 &my_unicase_default, /* caseinfo */
6327 nullptr, /* state_map */
6328 nullptr, /* ident_map */
6329 8, /* strxfrm_multiply */
6330 1, /* caseup_multiply */
6331 1, /* casedn_multiply */
6332 1, /* mbminlen */
6333 3, /* mbmaxlen */
6334 1, /* mbmaxlenlen */
6335 9, /* min_sort_char */
6336 0xFFFF, /* max_sort_char */
6337 ' ', /* pad char */
6338 false, /* escape_with_backslash_is_dangerous */
6339 1, /* levels_for_compare */
6340 &my_charset_utf8_handler,
6341 &my_collation_any_uca_handler,
6342 PAD_SPACE};
6343
6344 CHARSET_INFO my_charset_utf8_spanish_uca_ci = {
6345 199,
6346 0,
6347 0, /* number */
6348 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6349 "utf8mb3", /* cs name */
6350 "utf8mb3_spanish_ci", /* m_coll_name */
6351 "UTF-8 Unicode", /* comment */
6352 spanish, /* tailoring */
6353 nullptr, /* coll_param */
6354 ctype_utf8, /* ctype */
6355 nullptr, /* to_lower */
6356 nullptr, /* to_upper */
6357 nullptr, /* sort_order */
6358 nullptr, /* uca */
6359 nullptr, /* tab_to_uni */
6360 nullptr, /* tab_from_uni */
6361 &my_unicase_default, /* caseinfo */
6362 nullptr, /* state_map */
6363 nullptr, /* ident_map */
6364 8, /* strxfrm_multiply */
6365 1, /* caseup_multiply */
6366 1, /* casedn_multiply */
6367 1, /* mbminlen */
6368 3, /* mbmaxlen */
6369 1, /* mbmaxlenlen */
6370 9, /* min_sort_char */
6371 0xFFFF, /* max_sort_char */
6372 ' ', /* pad char */
6373 false, /* escape_with_backslash_is_dangerous */
6374 1, /* levels_for_compare */
6375 &my_charset_utf8_handler,
6376 &my_collation_any_uca_handler,
6377 PAD_SPACE};
6378
6379 CHARSET_INFO my_charset_utf8_swedish_uca_ci = {
6380 200,
6381 0,
6382 0, /* number */
6383 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6384 "utf8mb3", /* cs name */
6385 "utf8mb3_swedish_ci", /* m_coll_name */
6386 "UTF-8 Unicode", /* comment */
6387 swedish, /* tailoring */
6388 nullptr, /* coll_param */
6389 ctype_utf8, /* ctype */
6390 nullptr, /* to_lower */
6391 nullptr, /* to_upper */
6392 nullptr, /* sort_order */
6393 nullptr, /* uca */
6394 nullptr, /* tab_to_uni */
6395 nullptr, /* tab_from_uni */
6396 &my_unicase_default, /* caseinfo */
6397 nullptr, /* state_map */
6398 nullptr, /* ident_map */
6399 8, /* strxfrm_multiply */
6400 1, /* caseup_multiply */
6401 1, /* casedn_multiply */
6402 1, /* mbminlen */
6403 3, /* mbmaxlen */
6404 1, /* mbmaxlenlen */
6405 9, /* min_sort_char */
6406 0xFFFF, /* max_sort_char */
6407 ' ', /* pad char */
6408 false, /* escape_with_backslash_is_dangerous */
6409 1, /* levels_for_compare */
6410 &my_charset_utf8_handler,
6411 &my_collation_any_uca_handler,
6412 PAD_SPACE};
6413
6414 CHARSET_INFO my_charset_utf8_turkish_uca_ci = {
6415 201,
6416 0,
6417 0, /* number */
6418 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6419 "utf8mb3", /* cs name */
6420 "utf8mb3_turkish_ci", /* m_coll_name */
6421 "UTF-8 Unicode", /* comment */
6422 turkish, /* tailoring */
6423 nullptr, /* coll_param */
6424 ctype_utf8, /* ctype */
6425 nullptr, /* to_lower */
6426 nullptr, /* to_upper */
6427 nullptr, /* sort_order */
6428 nullptr, /* uca */
6429 nullptr, /* tab_to_uni */
6430 nullptr, /* tab_from_uni */
6431 &my_unicase_turkish, /* caseinfo */
6432 nullptr, /* state_map */
6433 nullptr, /* ident_map */
6434 8, /* strxfrm_multiply */
6435 2, /* caseup_multiply */
6436 2, /* casedn_multiply */
6437 1, /* mbminlen */
6438 3, /* mbmaxlen */
6439 1, /* mbmaxlenlen */
6440 9, /* min_sort_char */
6441 0xFFFF, /* max_sort_char */
6442 ' ', /* pad char */
6443 false, /* escape_with_backslash_is_dangerous */
6444 1, /* levels_for_compare */
6445 &my_charset_utf8_handler,
6446 &my_collation_any_uca_handler,
6447 PAD_SPACE};
6448
6449 CHARSET_INFO my_charset_utf8_czech_uca_ci = {
6450 202,
6451 0,
6452 0, /* number */
6453 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6454 "utf8mb3", /* cs name */
6455 "utf8mb3_czech_ci", /* m_coll_name */
6456 "UTF-8 Unicode", /* comment */
6457 czech, /* tailoring */
6458 nullptr, /* coll_param */
6459 ctype_utf8, /* ctype */
6460 nullptr, /* to_lower */
6461 nullptr, /* to_upper */
6462 nullptr, /* sort_order */
6463 nullptr, /* uca */
6464 nullptr, /* tab_to_uni */
6465 nullptr, /* tab_from_uni */
6466 &my_unicase_default, /* caseinfo */
6467 nullptr, /* state_map */
6468 nullptr, /* ident_map */
6469 8, /* strxfrm_multiply */
6470 1, /* caseup_multiply */
6471 1, /* casedn_multiply */
6472 1, /* mbminlen */
6473 3, /* mbmaxlen */
6474 1, /* mbmaxlenlen */
6475 9, /* min_sort_char */
6476 0xFFFF, /* max_sort_char */
6477 ' ', /* pad char */
6478 false, /* escape_with_backslash_is_dangerous */
6479 1, /* levels_for_compare */
6480 &my_charset_utf8_handler,
6481 &my_collation_any_uca_handler,
6482 PAD_SPACE};
6483
6484 CHARSET_INFO my_charset_utf8_danish_uca_ci = {
6485 203,
6486 0,
6487 0, /* number */
6488 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6489 "utf8mb3", /* cs name */
6490 "utf8mb3_danish_ci", /* m_coll_name */
6491 "UTF-8 Unicode", /* comment */
6492 danish, /* tailoring */
6493 nullptr, /* coll_param */
6494 ctype_utf8, /* ctype */
6495 nullptr, /* to_lower */
6496 nullptr, /* to_upper */
6497 nullptr, /* sort_order */
6498 nullptr, /* uca */
6499 nullptr, /* tab_to_uni */
6500 nullptr, /* tab_from_uni */
6501 &my_unicase_default, /* caseinfo */
6502 nullptr, /* state_map */
6503 nullptr, /* ident_map */
6504 8, /* strxfrm_multiply */
6505 1, /* caseup_multiply */
6506 1, /* casedn_multiply */
6507 1, /* mbminlen */
6508 3, /* mbmaxlen */
6509 1, /* mbmaxlenlen */
6510 9, /* min_sort_char */
6511 0xFFFF, /* max_sort_char */
6512 ' ', /* pad char */
6513 false, /* escape_with_backslash_is_dangerous */
6514 1, /* levels_for_compare */
6515 &my_charset_utf8_handler,
6516 &my_collation_any_uca_handler,
6517 PAD_SPACE};
6518
6519 CHARSET_INFO my_charset_utf8_lithuanian_uca_ci = {
6520 204,
6521 0,
6522 0, /* number */
6523 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6524 "utf8mb3", /* cs name */
6525 "utf8mb3_lithuanian_ci", /* m_coll_name */
6526 "UTF-8 Unicode", /* comment */
6527 lithuanian, /* tailoring */
6528 nullptr, /* coll_param */
6529 ctype_utf8, /* ctype */
6530 nullptr, /* to_lower */
6531 nullptr, /* to_upper */
6532 nullptr, /* sort_order */
6533 nullptr, /* uca */
6534 nullptr, /* tab_to_uni */
6535 nullptr, /* tab_from_uni */
6536 &my_unicase_default, /* caseinfo */
6537 nullptr, /* state_map */
6538 nullptr, /* ident_map */
6539 8, /* strxfrm_multiply */
6540 1, /* caseup_multiply */
6541 1, /* casedn_multiply */
6542 1, /* mbminlen */
6543 3, /* mbmaxlen */
6544 1, /* mbmaxlenlen */
6545 9, /* min_sort_char */
6546 0xFFFF, /* max_sort_char */
6547 ' ', /* pad char */
6548 false, /* escape_with_backslash_is_dangerous */
6549 1, /* levels_for_compare */
6550 &my_charset_utf8_handler,
6551 &my_collation_any_uca_handler,
6552 PAD_SPACE};
6553
6554 CHARSET_INFO my_charset_utf8_slovak_uca_ci = {
6555 205,
6556 0,
6557 0, /* number */
6558 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6559 "utf8mb3", /* cs name */
6560 "utf8mb3_slovak_ci", /* m_coll_name */
6561 "UTF-8 Unicode", /* comment */
6562 slovak, /* tailoring */
6563 nullptr, /* coll_param */
6564 ctype_utf8, /* ctype */
6565 nullptr, /* to_lower */
6566 nullptr, /* to_upper */
6567 nullptr, /* sort_order */
6568 nullptr, /* uca */
6569 nullptr, /* tab_to_uni */
6570 nullptr, /* tab_from_uni */
6571 &my_unicase_default, /* caseinfo */
6572 nullptr, /* state_map */
6573 nullptr, /* ident_map */
6574 8, /* strxfrm_multiply */
6575 1, /* caseup_multiply */
6576 1, /* casedn_multiply */
6577 1, /* mbminlen */
6578 3, /* mbmaxlen */
6579 1, /* mbmaxlenlen */
6580 9, /* min_sort_char */
6581 0xFFFF, /* max_sort_char */
6582 ' ', /* pad char */
6583 false, /* escape_with_backslash_is_dangerous */
6584 1, /* levels_for_compare */
6585 &my_charset_utf8_handler,
6586 &my_collation_any_uca_handler,
6587 PAD_SPACE};
6588
6589 CHARSET_INFO my_charset_utf8_spanish2_uca_ci = {
6590 206,
6591 0,
6592 0, /* number */
6593 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6594 "utf8mb3", /* cs name */
6595 "utf8mb3_spanish2_ci", /* m_coll_name */
6596 "UTF-8 Unicode", /* comment */
6597 spanish2, /* tailoring */
6598 nullptr, /* coll_param */
6599 ctype_utf8, /* ctype */
6600 nullptr, /* to_lower */
6601 nullptr, /* to_upper */
6602 nullptr, /* sort_order */
6603 nullptr, /* uca */
6604 nullptr, /* tab_to_uni */
6605 nullptr, /* tab_from_uni */
6606 &my_unicase_default, /* caseinfo */
6607 nullptr, /* state_map */
6608 nullptr, /* ident_map */
6609 8, /* strxfrm_multiply */
6610 1, /* caseup_multiply */
6611 1, /* casedn_multiply */
6612 1, /* mbminlen */
6613 3, /* mbmaxlen */
6614 1, /* mbmaxlenlen */
6615 9, /* min_sort_char */
6616 0xFFFF, /* max_sort_char */
6617 ' ', /* pad char */
6618 false, /* escape_with_backslash_is_dangerous */
6619 1, /* levels_for_compare */
6620 &my_charset_utf8_handler,
6621 &my_collation_any_uca_handler,
6622 PAD_SPACE};
6623
6624 CHARSET_INFO my_charset_utf8_roman_uca_ci = {
6625 207,
6626 0,
6627 0, /* number */
6628 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6629 "utf8mb3", /* cs name */
6630 "utf8mb3_roman_ci", /* m_coll_name */
6631 "UTF-8 Unicode", /* comment */
6632 roman, /* tailoring */
6633 nullptr, /* coll_param */
6634 ctype_utf8, /* ctype */
6635 nullptr, /* to_lower */
6636 nullptr, /* to_upper */
6637 nullptr, /* sort_order */
6638 nullptr, /* uca */
6639 nullptr, /* tab_to_uni */
6640 nullptr, /* tab_from_uni */
6641 &my_unicase_default, /* caseinfo */
6642 nullptr, /* state_map */
6643 nullptr, /* ident_map */
6644 8, /* strxfrm_multiply */
6645 1, /* caseup_multiply */
6646 1, /* casedn_multiply */
6647 1, /* mbminlen */
6648 3, /* mbmaxlen */
6649 1, /* mbmaxlenlen */
6650 9, /* min_sort_char */
6651 0xFFFF, /* max_sort_char */
6652 ' ', /* pad char */
6653 false, /* escape_with_backslash_is_dangerous */
6654 1, /* levels_for_compare */
6655 &my_charset_utf8_handler,
6656 &my_collation_any_uca_handler,
6657 PAD_SPACE};
6658
6659 CHARSET_INFO my_charset_utf8_persian_uca_ci = {
6660 208,
6661 0,
6662 0, /* number */
6663 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6664 "utf8mb3", /* cs name */
6665 "utf8mb3_persian_ci", /* m_coll_name */
6666 "UTF-8 Unicode", /* comment */
6667 persian, /* tailoring */
6668 nullptr, /* coll_param */
6669 ctype_utf8, /* ctype */
6670 nullptr, /* to_lower */
6671 nullptr, /* to_upper */
6672 nullptr, /* sort_order */
6673 nullptr, /* uca */
6674 nullptr, /* tab_to_uni */
6675 nullptr, /* tab_from_uni */
6676 &my_unicase_default, /* caseinfo */
6677 nullptr, /* state_map */
6678 nullptr, /* ident_map */
6679 8, /* strxfrm_multiply */
6680 1, /* caseup_multiply */
6681 1, /* casedn_multiply */
6682 1, /* mbminlen */
6683 3, /* mbmaxlen */
6684 1, /* mbmaxlenlen */
6685 9, /* min_sort_char */
6686 0xFFFF, /* max_sort_char */
6687 ' ', /* pad char */
6688 false, /* escape_with_backslash_is_dangerous */
6689 1, /* levels_for_compare */
6690 &my_charset_utf8_handler,
6691 &my_collation_any_uca_handler,
6692 PAD_SPACE};
6693
6694 CHARSET_INFO my_charset_utf8_esperanto_uca_ci = {
6695 209,
6696 0,
6697 0, /* number */
6698 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6699 "utf8mb3", /* cs name */
6700 "utf8mb3_esperanto_ci", /* m_coll_name */
6701 "UTF-8 Unicode", /* comment */
6702 esperanto, /* tailoring */
6703 nullptr, /* coll_param */
6704 ctype_utf8, /* ctype */
6705 nullptr, /* to_lower */
6706 nullptr, /* to_upper */
6707 nullptr, /* sort_order */
6708 nullptr, /* uca */
6709 nullptr, /* tab_to_uni */
6710 nullptr, /* tab_from_uni */
6711 &my_unicase_default, /* caseinfo */
6712 nullptr, /* state_map */
6713 nullptr, /* ident_map */
6714 8, /* strxfrm_multiply */
6715 1, /* caseup_multiply */
6716 1, /* casedn_multiply */
6717 1, /* mbminlen */
6718 3, /* mbmaxlen */
6719 1, /* mbmaxlenlen */
6720 9, /* min_sort_char */
6721 0xFFFF, /* max_sort_char */
6722 ' ', /* pad char */
6723 false, /* escape_with_backslash_is_dangerous */
6724 1, /* levels_for_compare */
6725 &my_charset_utf8_handler,
6726 &my_collation_any_uca_handler,
6727 PAD_SPACE};
6728
6729 CHARSET_INFO my_charset_utf8_hungarian_uca_ci = {
6730 210,
6731 0,
6732 0, /* number */
6733 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6734 "utf8mb3", /* cs name */
6735 "utf8mb3_hungarian_ci", /* m_coll_name */
6736 "UTF-8 Unicode", /* comment */
6737 hungarian, /* tailoring */
6738 nullptr, /* coll_param */
6739 ctype_utf8, /* ctype */
6740 nullptr, /* to_lower */
6741 nullptr, /* to_upper */
6742 nullptr, /* sort_order */
6743 nullptr, /* uca */
6744 nullptr, /* tab_to_uni */
6745 nullptr, /* tab_from_uni */
6746 &my_unicase_default, /* caseinfo */
6747 nullptr, /* state_map */
6748 nullptr, /* ident_map */
6749 8, /* strxfrm_multiply */
6750 1, /* caseup_multiply */
6751 1, /* casedn_multiply */
6752 1, /* mbminlen */
6753 3, /* mbmaxlen */
6754 1, /* mbmaxlenlen */
6755 9, /* min_sort_char */
6756 0xFFFF, /* max_sort_char */
6757 ' ', /* pad char */
6758 false, /* escape_with_backslash_is_dangerous */
6759 1, /* levels_for_compare */
6760 &my_charset_utf8_handler,
6761 &my_collation_any_uca_handler,
6762 PAD_SPACE};
6763
6764 CHARSET_INFO my_charset_utf8_sinhala_uca_ci = {
6765 211,
6766 0,
6767 0, /* number */
6768 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6769 "utf8mb3", /* cs name */
6770 "utf8mb3_sinhala_ci", /* m_coll_name */
6771 "UTF-8 Unicode", /* comment */
6772 sinhala, /* tailoring */
6773 nullptr, /* coll_param */
6774 ctype_utf8, /* ctype */
6775 nullptr, /* to_lower */
6776 nullptr, /* to_upper */
6777 nullptr, /* sort_order */
6778 nullptr, /* uca */
6779 nullptr, /* tab_to_uni */
6780 nullptr, /* tab_from_uni */
6781 &my_unicase_default, /* caseinfo */
6782 nullptr, /* state_map */
6783 nullptr, /* ident_map */
6784 8, /* strxfrm_multiply */
6785 1, /* caseup_multiply */
6786 1, /* casedn_multiply */
6787 1, /* mbminlen */
6788 3, /* mbmaxlen */
6789 1, /* mbmaxlenlen */
6790 9, /* min_sort_char */
6791 0xFFFF, /* max_sort_char */
6792 ' ', /* pad char */
6793 false, /* escape_with_backslash_is_dangerous */
6794 1, /* levels_for_compare */
6795 &my_charset_utf8_handler,
6796 &my_collation_any_uca_handler,
6797 PAD_SPACE};
6798
6799 CHARSET_INFO my_charset_utf8_german2_uca_ci = {
6800 212,
6801 0,
6802 0, /* number */
6803 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6804 "utf8mb3", /* cs name */
6805 "utf8mb3_german2_ci", /* m_coll_name */
6806 "UTF-8 Unicode", /* comment */
6807 german2, /* tailoring */
6808 nullptr, /* coll_param */
6809 ctype_utf8, /* ctype */
6810 nullptr, /* to_lower */
6811 nullptr, /* to_upper */
6812 nullptr, /* sort_order */
6813 nullptr, /* uca */
6814 nullptr, /* tab_to_uni */
6815 nullptr, /* tab_from_uni */
6816 &my_unicase_default, /* caseinfo */
6817 nullptr, /* state_map */
6818 nullptr, /* ident_map */
6819 8, /* strxfrm_multiply */
6820 1, /* caseup_multiply */
6821 1, /* casedn_multiply */
6822 1, /* mbminlen */
6823 3, /* mbmaxlen */
6824 1, /* mbmaxlenlen */
6825 9, /* min_sort_char */
6826 0xFFFF, /* max_sort_char */
6827 ' ', /* pad char */
6828 false, /* escape_with_backslash_is_dangerous */
6829 1, /* levels_for_compare */
6830 &my_charset_utf8_handler,
6831 &my_collation_any_uca_handler,
6832 PAD_SPACE};
6833
6834 CHARSET_INFO my_charset_utf8_croatian_uca_ci = {
6835 213,
6836 0,
6837 0, /* number */
6838 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6839 "utf8mb3", /* cs name */
6840 "utf8mb3_croatian_ci", /* m_coll_name */
6841 "UTF-8 Unicode", /* comment */
6842 croatian, /* tailoring */
6843 nullptr, /* coll_param */
6844 ctype_utf8, /* ctype */
6845 nullptr, /* to_lower */
6846 nullptr, /* to_upper */
6847 nullptr, /* sort_order */
6848 nullptr, /* uca */
6849 nullptr, /* tab_to_uni */
6850 nullptr, /* tab_from_uni */
6851 &my_unicase_default, /* caseinfo */
6852 nullptr, /* state_map */
6853 nullptr, /* ident_map */
6854 8, /* strxfrm_multiply */
6855 1, /* caseup_multiply */
6856 1, /* casedn_multiply */
6857 1, /* mbminlen */
6858 3, /* mbmaxlen */
6859 1, /* mbmaxlenlen */
6860 9, /* min_sort_char */
6861 0xFFFF, /* max_sort_char */
6862 ' ', /* pad char */
6863 false, /* escape_with_backslash_is_dangerous */
6864 1, /* levels_for_compare */
6865 &my_charset_utf8_handler,
6866 &my_collation_any_uca_handler,
6867 PAD_SPACE};
6868
6869 CHARSET_INFO my_charset_utf8_unicode_520_ci = {
6870 214,
6871 0,
6872 0, /* number */
6873 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6874 "utf8mb3", /* csname */
6875 "utf8mb3_unicode_520_ci", /* m_coll_name */
6876 "UTF-8 Unicode", /* comment */
6877 "", /* tailoring */
6878 nullptr, /* coll_param */
6879 ctype_utf8, /* ctype */
6880 nullptr, /* to_lower */
6881 nullptr, /* to_upper */
6882 nullptr, /* sort_order */
6883 &my_uca_v520, /* uca */
6884 nullptr, /* tab_to_uni */
6885 nullptr, /* tab_from_uni */
6886 &my_unicase_unicode520, /* caseinfo */
6887 nullptr, /* state_map */
6888 nullptr, /* ident_map */
6889 8, /* strxfrm_multiply */
6890 1, /* caseup_multiply */
6891 1, /* casedn_multiply */
6892 1, /* mbminlen */
6893 3, /* mbmaxlen */
6894 1, /* mbmaxlenlen */
6895 9, /* min_sort_char */
6896 0xFFFF, /* max_sort_char */
6897 ' ', /* pad char */
6898 false, /* escape_with_backslash_is_dangerous */
6899 1, /* levels_for_compare */
6900 &my_charset_utf8_handler,
6901 &my_collation_any_uca_handler,
6902 PAD_SPACE};
6903
6904 CHARSET_INFO my_charset_utf8_vietnamese_ci = {
6905 215,
6906 0,
6907 0, /* number */
6908 MY_CS_UTF8MB3_UCA_FLAGS, /* flags */
6909 "utf8mb3", /* cs name */
6910 "utf8mb3_vietnamese_ci", /* m_coll_name */
6911 "UTF-8 Unicode", /* comment */
6912 vietnamese, /* tailoring */
6913 nullptr, /* coll_param */
6914 ctype_utf8, /* ctype */
6915 nullptr, /* to_lower */
6916 nullptr, /* to_upper */
6917 nullptr, /* sort_order */
6918 nullptr, /* uca */
6919 nullptr, /* tab_to_uni */
6920 nullptr, /* tab_from_uni */
6921 &my_unicase_default, /* caseinfo */
6922 nullptr, /* state_map */
6923 nullptr, /* ident_map */
6924 8, /* strxfrm_multiply */
6925 1, /* caseup_multiply */
6926 1, /* casedn_multiply */
6927 1, /* mbminlen */
6928 3, /* mbmaxlen */
6929 1, /* mbmaxlenlen */
6930 9, /* min_sort_char */
6931 0xFFFF, /* max_sort_char */
6932 ' ', /* pad char */
6933 false, /* escape_with_backslash_is_dangerous */
6934 1, /* levels_for_compare */
6935 &my_charset_utf8_handler,
6936 &my_collation_any_uca_handler,
6937 PAD_SPACE};
6938
6939 extern MY_CHARSET_HANDLER my_charset_utf8mb4_handler;
6940
6941 #define MY_CS_UTF8MB4_UCA_FLAGS \
6942 (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_UNICODE_SUPPLEMENT)
6943
6944 CHARSET_INFO my_charset_utf8mb4_unicode_ci = {
6945 224,
6946 0,
6947 0, /* number */
6948 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
6949 MY_UTF8MB4, /* csname */
6950 MY_UTF8MB4 "_unicode_ci", /* m_coll_name */
6951 "UTF-8 Unicode", /* comment */
6952 "", /* tailoring */
6953 nullptr, /* coll_param */
6954 ctype_utf8, /* ctype */
6955 nullptr, /* to_lower */
6956 nullptr, /* to_upper */
6957 nullptr, /* sort_order */
6958 nullptr, /* uca */
6959 nullptr, /* tab_to_uni */
6960 nullptr, /* tab_from_uni */
6961 &my_unicase_default, /* caseinfo */
6962 nullptr, /* state_map */
6963 nullptr, /* ident_map */
6964 8, /* strxfrm_multiply */
6965 1, /* caseup_multiply */
6966 1, /* casedn_multiply */
6967 1, /* mbminlen */
6968 4, /* mbmaxlen */
6969 1, /* mbmaxlenlen */
6970 9, /* min_sort_char */
6971 0xFFFF, /* max_sort_char */
6972 ' ', /* pad char */
6973 false, /* escape_with_backslash_is_dangerous */
6974 1, /* levels_for_compare */
6975 &my_charset_utf8mb4_handler,
6976 &my_collation_any_uca_handler,
6977 PAD_SPACE};
6978
6979 CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci = {
6980 225,
6981 0,
6982 0, /* number */
6983 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
6984 MY_UTF8MB4, /* csname */
6985 MY_UTF8MB4 "_icelandic_ci", /* m_coll_name */
6986 "UTF-8 Unicode", /* comment */
6987 icelandic, /* tailoring */
6988 nullptr, /* coll_param */
6989 ctype_utf8, /* ctype */
6990 nullptr, /* to_lower */
6991 nullptr, /* to_upper */
6992 nullptr, /* sort_order */
6993 nullptr, /* uca */
6994 nullptr, /* tab_to_uni */
6995 nullptr, /* tab_from_uni */
6996 &my_unicase_default, /* caseinfo */
6997 nullptr, /* state_map */
6998 nullptr, /* ident_map */
6999 8, /* strxfrm_multiply */
7000 1, /* caseup_multiply */
7001 1, /* casedn_multiply */
7002 1, /* mbminlen */
7003 4, /* mbmaxlen */
7004 1, /* mbmaxlenlen */
7005 9, /* min_sort_char */
7006 0xFFFF, /* max_sort_char */
7007 ' ', /* pad char */
7008 false, /* escape_with_backslash_is_dangerous */
7009 1, /* levels_for_compare */
7010 &my_charset_utf8mb4_handler,
7011 &my_collation_any_uca_handler,
7012 PAD_SPACE};
7013
7014 CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci = {
7015 226,
7016 0,
7017 0, /* number */
7018 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7019 MY_UTF8MB4, /* csname */
7020 MY_UTF8MB4 "_latvian_ci", /* m_coll_name */
7021 "UTF-8 Unicode", /* comment */
7022 latvian, /* tailoring */
7023 nullptr, /* coll_param */
7024 ctype_utf8, /* ctype */
7025 nullptr, /* to_lower */
7026 nullptr, /* to_upper */
7027 nullptr, /* sort_order */
7028 nullptr, /* uca */
7029 nullptr, /* tab_to_uni */
7030 nullptr, /* tab_from_uni */
7031 &my_unicase_default, /* caseinfo */
7032 nullptr, /* state_map */
7033 nullptr, /* ident_map */
7034 8, /* strxfrm_multiply */
7035 1, /* caseup_multiply */
7036 1, /* casedn_multiply */
7037 1, /* mbminlen */
7038 4, /* mbmaxlen */
7039 1, /* mbmaxlenlen */
7040 9, /* min_sort_char */
7041 0xFFFF, /* max_sort_char */
7042 ' ', /* pad char */
7043 false, /* escape_with_backslash_is_dangerous */
7044 1, /* levels_for_compare */
7045 &my_charset_utf8mb4_handler,
7046 &my_collation_any_uca_handler,
7047 PAD_SPACE};
7048
7049 CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci = {
7050 227,
7051 0,
7052 0, /* number */
7053 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7054 MY_UTF8MB4, /* csname */
7055 MY_UTF8MB4 "_romanian_ci", /* m_coll_name */
7056 "UTF-8 Unicode", /* comment */
7057 romanian, /* tailoring */
7058 nullptr, /* coll_param */
7059 ctype_utf8, /* ctype */
7060 nullptr, /* to_lower */
7061 nullptr, /* to_upper */
7062 nullptr, /* sort_order */
7063 nullptr, /* uca */
7064 nullptr, /* tab_to_uni */
7065 nullptr, /* tab_from_uni */
7066 &my_unicase_default, /* caseinfo */
7067 nullptr, /* state_map */
7068 nullptr, /* ident_map */
7069 8, /* strxfrm_multiply */
7070 1, /* caseup_multiply */
7071 1, /* casedn_multiply */
7072 1, /* mbminlen */
7073 4, /* mbmaxlen */
7074 1, /* mbmaxlenlen */
7075 9, /* min_sort_char */
7076 0xFFFF, /* max_sort_char */
7077 ' ', /* pad char */
7078 false, /* escape_with_backslash_is_dangerous */
7079 1, /* levels_for_compare */
7080 &my_charset_utf8mb4_handler,
7081 &my_collation_any_uca_handler,
7082 PAD_SPACE};
7083
7084 CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci = {
7085 228,
7086 0,
7087 0, /* number */
7088 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7089 MY_UTF8MB4, /* csname */
7090 MY_UTF8MB4 "_slovenian_ci", /* m_coll_name */
7091 "UTF-8 Unicode", /* comment */
7092 slovenian, /* tailoring */
7093 nullptr, /* coll_param */
7094 ctype_utf8, /* ctype */
7095 nullptr, /* to_lower */
7096 nullptr, /* to_upper */
7097 nullptr, /* sort_order */
7098 nullptr, /* uca */
7099 nullptr, /* tab_to_uni */
7100 nullptr, /* tab_from_uni */
7101 &my_unicase_default, /* caseinfo */
7102 nullptr, /* state_map */
7103 nullptr, /* ident_map */
7104 8, /* strxfrm_multiply */
7105 1, /* caseup_multiply */
7106 1, /* casedn_multiply */
7107 1, /* mbminlen */
7108 4, /* mbmaxlen */
7109 1, /* mbmaxlenlen */
7110 9, /* min_sort_char */
7111 0xFFFF, /* max_sort_char */
7112 ' ', /* pad char */
7113 false, /* escape_with_backslash_is_dangerous */
7114 1, /* levels_for_compare */
7115 &my_charset_utf8mb4_handler,
7116 &my_collation_any_uca_handler,
7117 PAD_SPACE};
7118
7119 CHARSET_INFO my_charset_utf8mb4_polish_uca_ci = {
7120 229,
7121 0,
7122 0, /* number */
7123 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7124 MY_UTF8MB4, /* csname */
7125 MY_UTF8MB4 "_polish_ci", /* m_coll_name */
7126 "UTF-8 Unicode", /* comment */
7127 polish, /* tailoring */
7128 nullptr, /* coll_param */
7129 ctype_utf8, /* ctype */
7130 nullptr, /* to_lower */
7131 nullptr, /* to_upper */
7132 nullptr, /* sort_order */
7133 nullptr, /* uca */
7134 nullptr, /* tab_to_uni */
7135 nullptr, /* tab_from_uni */
7136 &my_unicase_default, /* caseinfo */
7137 nullptr, /* state_map */
7138 nullptr, /* ident_map */
7139 8, /* strxfrm_multiply */
7140 1, /* caseup_multiply */
7141 1, /* casedn_multiply */
7142 1, /* mbminlen */
7143 4, /* mbmaxlen */
7144 1, /* mbmaxlenlen */
7145 9, /* min_sort_char */
7146 0xFFFF, /* max_sort_char */
7147 ' ', /* pad char */
7148 false, /* escape_with_backslash_is_dangerous */
7149 1, /* levels_for_compare */
7150 &my_charset_utf8mb4_handler,
7151 &my_collation_any_uca_handler,
7152 PAD_SPACE};
7153
7154 CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci = {
7155 230,
7156 0,
7157 0, /* number */
7158 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7159 MY_UTF8MB4, /* csname */
7160 MY_UTF8MB4 "_estonian_ci", /* m_coll_name */
7161 "UTF-8 Unicode", /* comment */
7162 estonian, /* tailoring */
7163 nullptr, /* coll_param */
7164 ctype_utf8, /* ctype */
7165 nullptr, /* to_lower */
7166 nullptr, /* to_upper */
7167 nullptr, /* sort_order */
7168 nullptr, /* uca */
7169 nullptr, /* tab_to_uni */
7170 nullptr, /* tab_from_uni */
7171 &my_unicase_default, /* caseinfo */
7172 nullptr, /* state_map */
7173 nullptr, /* ident_map */
7174 8, /* strxfrm_multiply */
7175 1, /* caseup_multiply */
7176 1, /* casedn_multiply */
7177 1, /* mbminlen */
7178 4, /* mbmaxlen */
7179 1, /* mbmaxlenlen */
7180 9, /* min_sort_char */
7181 0xFFFF, /* max_sort_char */
7182 ' ', /* pad char */
7183 false, /* escape_with_backslash_is_dangerous */
7184 1, /* levels_for_compare */
7185 &my_charset_utf8mb4_handler,
7186 &my_collation_any_uca_handler,
7187 PAD_SPACE};
7188
7189 CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci = {
7190 231,
7191 0,
7192 0, /* number */
7193 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7194 MY_UTF8MB4, /* csname */
7195 MY_UTF8MB4 "_spanish_ci", /* m_coll_name */
7196 "UTF-8 Unicode", /* comment */
7197 spanish, /* tailoring */
7198 nullptr, /* coll_param */
7199 ctype_utf8, /* ctype */
7200 nullptr, /* to_lower */
7201 nullptr, /* to_upper */
7202 nullptr, /* sort_order */
7203 nullptr, /* uca */
7204 nullptr, /* tab_to_uni */
7205 nullptr, /* tab_from_uni */
7206 &my_unicase_default, /* caseinfo */
7207 nullptr, /* state_map */
7208 nullptr, /* ident_map */
7209 8, /* strxfrm_multiply */
7210 1, /* caseup_multiply */
7211 1, /* casedn_multiply */
7212 1, /* mbminlen */
7213 4, /* mbmaxlen */
7214 1, /* mbmaxlenlen */
7215 9, /* min_sort_char */
7216 0xFFFF, /* max_sort_char */
7217 ' ', /* pad char */
7218 false, /* escape_with_backslash_is_dangerous */
7219 1, /* levels_for_compare */
7220 &my_charset_utf8mb4_handler,
7221 &my_collation_any_uca_handler,
7222 PAD_SPACE};
7223
7224 CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci = {
7225 232,
7226 0,
7227 0, /* number */
7228 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7229 MY_UTF8MB4, /* csname */
7230 MY_UTF8MB4 "_swedish_ci", /* m_coll_name */
7231 "UTF-8 Unicode", /* comment */
7232 swedish, /* tailoring */
7233 nullptr, /* coll_param */
7234 ctype_utf8, /* ctype */
7235 nullptr, /* to_lower */
7236 nullptr, /* to_upper */
7237 nullptr, /* sort_order */
7238 nullptr, /* uca */
7239 nullptr, /* tab_to_uni */
7240 nullptr, /* tab_from_uni */
7241 &my_unicase_default, /* caseinfo */
7242 nullptr, /* state_map */
7243 nullptr, /* ident_map */
7244 8, /* strxfrm_multiply */
7245 1, /* caseup_multiply */
7246 1, /* casedn_multiply */
7247 1, /* mbminlen */
7248 4, /* mbmaxlen */
7249 1, /* mbmaxlenlen */
7250 9, /* min_sort_char */
7251 0xFFFF, /* max_sort_char */
7252 ' ', /* pad char */
7253 false, /* escape_with_backslash_is_dangerous */
7254 1, /* levels_for_compare */
7255 &my_charset_utf8mb4_handler,
7256 &my_collation_any_uca_handler,
7257 PAD_SPACE};
7258
7259 CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci = {
7260 233,
7261 0,
7262 0, /* number */
7263 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7264 MY_UTF8MB4, /* csname */
7265 MY_UTF8MB4 "_turkish_ci", /* m_coll_name */
7266 "UTF-8 Unicode", /* comment */
7267 turkish, /* tailoring */
7268 nullptr, /* coll_param */
7269 ctype_utf8, /* ctype */
7270 nullptr, /* to_lower */
7271 nullptr, /* to_upper */
7272 nullptr, /* sort_order */
7273 nullptr, /* uca */
7274 nullptr, /* tab_to_uni */
7275 nullptr, /* tab_from_uni */
7276 &my_unicase_turkish, /* caseinfo */
7277 nullptr, /* state_map */
7278 nullptr, /* ident_map */
7279 8, /* strxfrm_multiply */
7280 2, /* caseup_multiply */
7281 2, /* casedn_multiply */
7282 1, /* mbminlen */
7283 4, /* mbmaxlen */
7284 1, /* mbmaxlenlen */
7285 9, /* min_sort_char */
7286 0xFFFF, /* max_sort_char */
7287 ' ', /* pad char */
7288 false, /* escape_with_backslash_is_dangerous */
7289 1, /* levels_for_compare */
7290 &my_charset_utf8mb4_handler,
7291 &my_collation_any_uca_handler,
7292 PAD_SPACE};
7293
7294 CHARSET_INFO my_charset_utf8mb4_czech_uca_ci = {
7295 234,
7296 0,
7297 0, /* number */
7298 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7299 MY_UTF8MB4, /* csname */
7300 MY_UTF8MB4 "_czech_ci", /* m_coll_name */
7301 "UTF-8 Unicode", /* comment */
7302 czech, /* tailoring */
7303 nullptr, /* coll_param */
7304 ctype_utf8, /* ctype */
7305 nullptr, /* to_lower */
7306 nullptr, /* to_upper */
7307 nullptr, /* sort_order */
7308 nullptr, /* uca */
7309 nullptr, /* tab_to_uni */
7310 nullptr, /* tab_from_uni */
7311 &my_unicase_default, /* caseinfo */
7312 nullptr, /* state_map */
7313 nullptr, /* ident_map */
7314 8, /* strxfrm_multiply */
7315 1, /* caseup_multiply */
7316 1, /* casedn_multiply */
7317 1, /* mbminlen */
7318 4, /* mbmaxlen */
7319 1, /* mbmaxlenlen */
7320 9, /* min_sort_char */
7321 0xFFFF, /* max_sort_char */
7322 ' ', /* pad char */
7323 false, /* escape_with_backslash_is_dangerous */
7324 1, /* levels_for_compare */
7325 &my_charset_utf8mb4_handler,
7326 &my_collation_any_uca_handler,
7327 PAD_SPACE};
7328
7329 CHARSET_INFO my_charset_utf8mb4_danish_uca_ci = {
7330 235,
7331 0,
7332 0, /* number */
7333 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7334 MY_UTF8MB4, /* csname */
7335 MY_UTF8MB4 "_danish_ci", /* m_coll_name */
7336 "UTF-8 Unicode", /* comment */
7337 danish, /* tailoring */
7338 nullptr, /* coll_param */
7339 ctype_utf8, /* ctype */
7340 nullptr, /* to_lower */
7341 nullptr, /* to_upper */
7342 nullptr, /* sort_order */
7343 nullptr, /* uca */
7344 nullptr, /* tab_to_uni */
7345 nullptr, /* tab_from_uni */
7346 &my_unicase_default, /* caseinfo */
7347 nullptr, /* state_map */
7348 nullptr, /* ident_map */
7349 8, /* strxfrm_multiply */
7350 1, /* caseup_multiply */
7351 1, /* casedn_multiply */
7352 1, /* mbminlen */
7353 4, /* mbmaxlen */
7354 1, /* mbmaxlenlen */
7355 9, /* min_sort_char */
7356 0xFFFF, /* max_sort_char */
7357 ' ', /* pad char */
7358 false, /* escape_with_backslash_is_dangerous */
7359 1, /* levels_for_compare */
7360 &my_charset_utf8mb4_handler,
7361 &my_collation_any_uca_handler,
7362 PAD_SPACE};
7363
7364 CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci = {
7365 236,
7366 0,
7367 0, /* number */
7368 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7369 MY_UTF8MB4, /* csname */
7370 MY_UTF8MB4 "_lithuanian_ci", /* m_coll_name */
7371 "UTF-8 Unicode", /* comment */
7372 lithuanian, /* tailoring */
7373 nullptr, /* coll_param */
7374 ctype_utf8, /* ctype */
7375 nullptr, /* to_lower */
7376 nullptr, /* to_upper */
7377 nullptr, /* sort_order */
7378 nullptr, /* uca */
7379 nullptr, /* tab_to_uni */
7380 nullptr, /* tab_from_uni */
7381 &my_unicase_default, /* caseinfo */
7382 nullptr, /* state_map */
7383 nullptr, /* ident_map */
7384 8, /* strxfrm_multiply */
7385 1, /* caseup_multiply */
7386 1, /* casedn_multiply */
7387 1, /* mbminlen */
7388 4, /* mbmaxlen */
7389 1, /* mbmaxlenlen */
7390 9, /* min_sort_char */
7391 0xFFFF, /* max_sort_char */
7392 ' ', /* pad char */
7393 false, /* escape_with_backslash_is_dangerous */
7394 1, /* levels_for_compare */
7395 &my_charset_utf8mb4_handler,
7396 &my_collation_any_uca_handler,
7397 PAD_SPACE};
7398
7399 CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci = {
7400 237,
7401 0,
7402 0, /* number */
7403 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7404 MY_UTF8MB4, /* csname */
7405 MY_UTF8MB4 "_slovak_ci", /* m_coll_name */
7406 "UTF-8 Unicode", /* comment */
7407 slovak, /* tailoring */
7408 nullptr, /* coll_param */
7409 ctype_utf8, /* ctype */
7410 nullptr, /* to_lower */
7411 nullptr, /* to_upper */
7412 nullptr, /* sort_order */
7413 nullptr, /* uca */
7414 nullptr, /* tab_to_uni */
7415 nullptr, /* tab_from_uni */
7416 &my_unicase_default, /* caseinfo */
7417 nullptr, /* state_map */
7418 nullptr, /* ident_map */
7419 8, /* strxfrm_multiply */
7420 1, /* caseup_multiply */
7421 1, /* casedn_multiply */
7422 1, /* mbminlen */
7423 4, /* mbmaxlen */
7424 1, /* mbmaxlenlen */
7425 9, /* min_sort_char */
7426 0xFFFF, /* max_sort_char */
7427 ' ', /* pad char */
7428 false, /* escape_with_backslash_is_dangerous */
7429 1, /* levels_for_compare */
7430 &my_charset_utf8mb4_handler,
7431 &my_collation_any_uca_handler,
7432 PAD_SPACE};
7433
7434 CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci = {
7435 238,
7436 0,
7437 0, /* number */
7438 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7439 MY_UTF8MB4, /* csname */
7440 MY_UTF8MB4 "_spanish2_ci", /* m_coll_name */
7441 "UTF-8 Unicode", /* comment */
7442 spanish2, /* tailoring */
7443 nullptr, /* coll_param */
7444 ctype_utf8, /* ctype */
7445 nullptr, /* to_lower */
7446 nullptr, /* to_upper */
7447 nullptr, /* sort_order */
7448 nullptr, /* uca */
7449 nullptr, /* tab_to_uni */
7450 nullptr, /* tab_from_uni */
7451 &my_unicase_default, /* caseinfo */
7452 nullptr, /* state_map */
7453 nullptr, /* ident_map */
7454 8, /* strxfrm_multiply */
7455 1, /* caseup_multiply */
7456 1, /* casedn_multiply */
7457 1, /* mbminlen */
7458 4, /* mbmaxlen */
7459 1, /* mbmaxlenlen */
7460 9, /* min_sort_char */
7461 0xFFFF, /* max_sort_char */
7462 ' ', /* pad char */
7463 false, /* escape_with_backslash_is_dangerous */
7464 1, /* levels_for_compare */
7465 &my_charset_utf8mb4_handler,
7466 &my_collation_any_uca_handler,
7467 PAD_SPACE};
7468
7469 CHARSET_INFO my_charset_utf8mb4_roman_uca_ci = {
7470 239,
7471 0,
7472 0, /* number */
7473 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7474 MY_UTF8MB4, /* csname */
7475 MY_UTF8MB4 "_roman_ci", /* m_coll_name */
7476 "UTF-8 Unicode", /* comment */
7477 roman, /* tailoring */
7478 nullptr, /* coll_param */
7479 ctype_utf8, /* ctype */
7480 nullptr, /* to_lower */
7481 nullptr, /* to_upper */
7482 nullptr, /* sort_order */
7483 nullptr, /* uca */
7484 nullptr, /* tab_to_uni */
7485 nullptr, /* tab_from_uni */
7486 &my_unicase_default, /* caseinfo */
7487 nullptr, /* state_map */
7488 nullptr, /* ident_map */
7489 8, /* strxfrm_multiply */
7490 1, /* caseup_multiply */
7491 1, /* casedn_multiply */
7492 1, /* mbminlen */
7493 4, /* mbmaxlen */
7494 1, /* mbmaxlenlen */
7495 9, /* min_sort_char */
7496 0xFFFF, /* max_sort_char */
7497 ' ', /* pad char */
7498 false, /* escape_with_backslash_is_dangerous */
7499 1, /* levels_for_compare */
7500 &my_charset_utf8mb4_handler,
7501 &my_collation_any_uca_handler,
7502 PAD_SPACE};
7503
7504 CHARSET_INFO my_charset_utf8mb4_persian_uca_ci = {
7505 240,
7506 0,
7507 0, /* number */
7508 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7509 MY_UTF8MB4, /* csname */
7510 MY_UTF8MB4 "_persian_ci", /* m_coll_name */
7511 "UTF-8 Unicode", /* comment */
7512 persian, /* tailoring */
7513 nullptr, /* coll_param */
7514 ctype_utf8, /* ctype */
7515 nullptr, /* to_lower */
7516 nullptr, /* to_upper */
7517 nullptr, /* sort_order */
7518 nullptr, /* uca */
7519 nullptr, /* tab_to_uni */
7520 nullptr, /* tab_from_uni */
7521 &my_unicase_default, /* caseinfo */
7522 nullptr, /* state_map */
7523 nullptr, /* ident_map */
7524 8, /* strxfrm_multiply */
7525 1, /* caseup_multiply */
7526 1, /* casedn_multiply */
7527 1, /* mbminlen */
7528 4, /* mbmaxlen */
7529 1, /* mbmaxlenlen */
7530 9, /* min_sort_char */
7531 0xFFFF, /* max_sort_char */
7532 ' ', /* pad char */
7533 false, /* escape_with_backslash_is_dangerous */
7534 1, /* levels_for_compare */
7535 &my_charset_utf8mb4_handler,
7536 &my_collation_any_uca_handler,
7537 PAD_SPACE};
7538
7539 CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci = {
7540 241,
7541 0,
7542 0, /* number */
7543 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7544 MY_UTF8MB4, /* csname */
7545 MY_UTF8MB4 "_esperanto_ci", /* m_coll_name */
7546 "UTF-8 Unicode", /* comment */
7547 esperanto, /* tailoring */
7548 nullptr, /* coll_param */
7549 ctype_utf8, /* ctype */
7550 nullptr, /* to_lower */
7551 nullptr, /* to_upper */
7552 nullptr, /* sort_order */
7553 nullptr, /* uca */
7554 nullptr, /* tab_to_uni */
7555 nullptr, /* tab_from_uni */
7556 &my_unicase_default, /* caseinfo */
7557 nullptr, /* state_map */
7558 nullptr, /* ident_map */
7559 8, /* strxfrm_multiply */
7560 1, /* caseup_multiply */
7561 1, /* casedn_multiply */
7562 1, /* mbminlen */
7563 4, /* mbmaxlen */
7564 1, /* mbmaxlenlen */
7565 9, /* min_sort_char */
7566 0xFFFF, /* max_sort_char */
7567 ' ', /* pad char */
7568 false, /* escape_with_backslash_is_dangerous */
7569 1, /* levels_for_compare */
7570 &my_charset_utf8mb4_handler,
7571 &my_collation_any_uca_handler,
7572 PAD_SPACE};
7573
7574 CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci = {
7575 242,
7576 0,
7577 0, /* number */
7578 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7579 MY_UTF8MB4, /* csname */
7580 MY_UTF8MB4 "_hungarian_ci", /* m_coll_name */
7581 "UTF-8 Unicode", /* comment */
7582 hungarian, /* tailoring */
7583 nullptr, /* coll_param */
7584 ctype_utf8, /* ctype */
7585 nullptr, /* to_lower */
7586 nullptr, /* to_upper */
7587 nullptr, /* sort_order */
7588 nullptr, /* uca */
7589 nullptr, /* tab_to_uni */
7590 nullptr, /* tab_from_uni */
7591 &my_unicase_default, /* caseinfo */
7592 nullptr, /* state_map */
7593 nullptr, /* ident_map */
7594 8, /* strxfrm_multiply */
7595 1, /* caseup_multiply */
7596 1, /* casedn_multiply */
7597 1, /* mbminlen */
7598 4, /* mbmaxlen */
7599 1, /* mbmaxlenlen */
7600 9, /* min_sort_char */
7601 0xFFFF, /* max_sort_char */
7602 ' ', /* pad char */
7603 false, /* escape_with_backslash_is_dangerous */
7604 1, /* levels_for_compare */
7605 &my_charset_utf8mb4_handler,
7606 &my_collation_any_uca_handler,
7607 PAD_SPACE};
7608
7609 CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci = {
7610 243,
7611 0,
7612 0, /* number */
7613 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7614 MY_UTF8MB4, /* csname */
7615 MY_UTF8MB4 "_sinhala_ci", /* m_coll_name */
7616 "UTF-8 Unicode", /* comment */
7617 sinhala, /* tailoring */
7618 nullptr, /* coll_param */
7619 ctype_utf8, /* ctype */
7620 nullptr, /* to_lower */
7621 nullptr, /* to_upper */
7622 nullptr, /* sort_order */
7623 nullptr, /* uca */
7624 nullptr, /* tab_to_uni */
7625 nullptr, /* tab_from_uni */
7626 &my_unicase_default, /* caseinfo */
7627 nullptr, /* state_map */
7628 nullptr, /* ident_map */
7629 8, /* strxfrm_multiply */
7630 1, /* caseup_multiply */
7631 1, /* casedn_multiply */
7632 1, /* mbminlen */
7633 4, /* mbmaxlen */
7634 1, /* mbmaxlenlen */
7635 9, /* min_sort_char */
7636 0xFFFF, /* max_sort_char */
7637 ' ', /* pad char */
7638 false, /* escape_with_backslash_is_dangerous */
7639 1, /* levels_for_compare */
7640 &my_charset_utf8mb4_handler,
7641 &my_collation_any_uca_handler,
7642 PAD_SPACE};
7643
7644 CHARSET_INFO my_charset_utf8mb4_german2_uca_ci = {
7645 244,
7646 0,
7647 0, /* number */
7648 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7649 MY_UTF8MB4, /* csname */
7650 MY_UTF8MB4 "_german2_ci", /* m_coll_name */
7651 "UTF-8 Unicode", /* comment */
7652 german2, /* tailoring */
7653 nullptr, /* coll_param */
7654 ctype_utf8, /* ctype */
7655 nullptr, /* to_lower */
7656 nullptr, /* to_upper */
7657 nullptr, /* sort_order */
7658 nullptr, /* uca */
7659 nullptr, /* tab_to_uni */
7660 nullptr, /* tab_from_uni */
7661 &my_unicase_default, /* caseinfo */
7662 nullptr, /* state_map */
7663 nullptr, /* ident_map */
7664 8, /* strxfrm_multiply */
7665 1, /* caseup_multiply */
7666 1, /* casedn_multiply */
7667 1, /* mbminlen */
7668 4, /* mbmaxlen */
7669 1, /* mbmaxlenlen */
7670 9, /* min_sort_char */
7671 0xFFFF, /* max_sort_char */
7672 ' ', /* pad char */
7673 false, /* escape_with_backslash_is_dangerous */
7674 1, /* levels_for_compare */
7675 &my_charset_utf8mb4_handler,
7676 &my_collation_any_uca_handler,
7677 PAD_SPACE};
7678
7679 CHARSET_INFO my_charset_utf8mb4_croatian_uca_ci = {
7680 245,
7681 0,
7682 0, /* number */
7683 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7684 MY_UTF8MB4, /* csname */
7685 MY_UTF8MB4 "_croatian_ci", /* m_coll_name */
7686 "UTF-8 Unicode", /* comment */
7687 croatian, /* tailoring */
7688 nullptr, /* coll_param */
7689 ctype_utf8, /* ctype */
7690 nullptr, /* to_lower */
7691 nullptr, /* to_upper */
7692 nullptr, /* sort_order */
7693 nullptr, /* uca */
7694 nullptr, /* tab_to_uni */
7695 nullptr, /* tab_from_uni */
7696 &my_unicase_default, /* caseinfo */
7697 nullptr, /* state_map */
7698 nullptr, /* ident_map */
7699 8, /* strxfrm_multiply */
7700 1, /* caseup_multiply */
7701 1, /* casedn_multiply */
7702 1, /* mbminlen */
7703 4, /* mbmaxlen */
7704 1, /* mbmaxlenlen */
7705 9, /* min_sort_char */
7706 0xFFFF, /* max_sort_char */
7707 ' ', /* pad char */
7708 false, /* escape_with_backslash_is_dangerous */
7709 1, /* levels_for_compare */
7710 &my_charset_utf8mb4_handler,
7711 &my_collation_any_uca_handler,
7712 PAD_SPACE};
7713
7714 CHARSET_INFO my_charset_utf8mb4_unicode_520_ci = {
7715 246,
7716 0,
7717 0, /* number */
7718 MY_CS_UTF8MB4_UCA_FLAGS, /* flags */
7719 MY_UTF8MB4, /* csname */
7720 MY_UTF8MB4 "_unicode_520_ci", /* m_coll_name */
7721 "UTF-8 Unicode", /* comment */
7722 "", /* tailoring */
7723 nullptr, /* coll_param */
7724 ctype_utf8, /* ctype */
7725 nullptr, /* to_lower */
7726 nullptr, /* to_upper */
7727 nullptr, /* sort_order */
7728 &my_uca_v520, /* uca */
7729 nullptr, /* tab_to_uni */
7730 nullptr, /* tab_from_uni */
7731 &my_unicase_unicode520, /* caseinfo */
7732 nullptr, /* state_map */
7733 nullptr, /* ident_map */
7734 8, /* strxfrm_multiply */
7735 1, /* caseup_multiply */
7736 1, /* casedn_multiply */
7737 1, /* mbminlen */
7738 4, /* mbmaxlen */
7739 1, /* mbmaxlenlen */
7740 9, /* min_sort_char */
7741 0x10FFFF, /* max_sort_char */
7742 ' ', /* pad char */
7743 false, /* escape_with_backslash_is_dangerous */
7744 1, /* levels_for_compare */
7745 &my_charset_utf8mb4_handler,
7746 &my_collation_any_uca_handler,
7747 PAD_SPACE};
7748
7749 CHARSET_INFO my_charset_utf8mb4_vietnamese_ci = {
7750 247,
7751 0,
7752 0, /* number */
7753 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
7754 MY_UTF8MB4, /* csname */
7755 MY_UTF8MB4 "_vietnamese_ci", /* m_coll_name */
7756 "UTF-8 Unicode", /* comment */
7757 vietnamese, /* tailoring */
7758 nullptr, /* coll_param */
7759 ctype_utf8, /* ctype */
7760 nullptr, /* to_lower */
7761 nullptr, /* to_upper */
7762 nullptr, /* sort_order */
7763 nullptr, /* uca */
7764 nullptr, /* tab_to_uni */
7765 nullptr, /* tab_from_uni */
7766 &my_unicase_default, /* caseinfo */
7767 nullptr, /* state_map */
7768 nullptr, /* ident_map */
7769 8, /* strxfrm_multiply */
7770 1, /* caseup_multiply */
7771 1, /* casedn_multiply */
7772 1, /* mbminlen */
7773 4, /* mbmaxlen */
7774 1, /* mbmaxlenlen */
7775 9, /* min_sort_char */
7776 0xFFFF, /* max_sort_char */
7777 ' ', /* pad char */
7778 false, /* escape_with_backslash_is_dangerous */
7779 1, /* levels_for_compare */
7780 &my_charset_utf8mb4_handler,
7781 &my_collation_any_uca_handler,
7782 PAD_SPACE};
7783
7784 MY_COLLATION_HANDLER my_collation_utf32_uca_handler = {
7785 my_coll_init_uca, /* init */
7786 my_coll_uninit_uca,
7787 my_strnncoll_any_uca,
7788 my_strnncollsp_any_uca,
7789 my_strnxfrm_any_uca,
7790 my_strnxfrmlen_simple,
7791 my_like_range_generic,
7792 my_wildcmp_uca,
7793 nullptr,
7794 my_instr_mb,
7795 my_hash_sort_any_uca,
7796 my_propagate_complex};
7797
7798 extern MY_CHARSET_HANDLER my_charset_utf32_handler;
7799
7800 #define MY_CS_UTF32_UCA_FLAGS \
7801 (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | \
7802 MY_CS_UNICODE_SUPPLEMENT | MY_CS_NONASCII)
7803
7804 CHARSET_INFO my_charset_utf32_unicode_ci = {
7805 160,
7806 0,
7807 0, /* number */
7808 MY_CS_UTF32_UCA_FLAGS, /* state */
7809 "utf32", /* csname */
7810 "utf32_unicode_ci", /* m_coll_name */
7811 "", /* comment */
7812 "", /* tailoring */
7813 nullptr, /* coll_param */
7814 nullptr, /* ctype */
7815 nullptr, /* to_lower */
7816 nullptr, /* to_upper */
7817 nullptr, /* sort_order */
7818 nullptr, /* uca */
7819 nullptr, /* tab_to_uni */
7820 nullptr, /* tab_from_uni */
7821 &my_unicase_default, /* caseinfo */
7822 nullptr, /* state_map */
7823 nullptr, /* ident_map */
7824 8, /* strxfrm_multiply */
7825 1, /* caseup_multiply */
7826 1, /* casedn_multiply */
7827 4, /* mbminlen */
7828 4, /* mbmaxlen */
7829 1, /* mbmaxlenlen */
7830 9, /* min_sort_char */
7831 0xFFFF, /* max_sort_char */
7832 ' ', /* pad char */
7833 false, /* escape_with_backslash_is_dangerous */
7834 1, /* levels_for_compare */
7835 &my_charset_utf32_handler,
7836 &my_collation_utf32_uca_handler,
7837 PAD_SPACE};
7838
7839 CHARSET_INFO my_charset_utf32_icelandic_uca_ci = {
7840 161,
7841 0,
7842 0, /* number */
7843 MY_CS_UTF32_UCA_FLAGS, /* state */
7844 "utf32", /* csname */
7845 "utf32_icelandic_ci", /* m_coll_name */
7846 "", /* comment */
7847 icelandic, /* tailoring */
7848 nullptr, /* coll_param */
7849 nullptr, /* ctype */
7850 nullptr, /* to_lower */
7851 nullptr, /* to_upper */
7852 nullptr, /* sort_order */
7853 nullptr, /* uca */
7854 nullptr, /* tab_to_uni */
7855 nullptr, /* tab_from_uni */
7856 &my_unicase_default, /* caseinfo */
7857 nullptr, /* state_map */
7858 nullptr, /* ident_map */
7859 8, /* strxfrm_multiply */
7860 1, /* caseup_multiply */
7861 1, /* casedn_multiply */
7862 4, /* mbminlen */
7863 4, /* mbmaxlen */
7864 1, /* mbmaxlenlen */
7865 9, /* min_sort_char */
7866 0xFFFF, /* max_sort_char */
7867 ' ', /* pad char */
7868 false, /* escape_with_backslash_is_dangerous */
7869 1, /* levels_for_compare */
7870 &my_charset_utf32_handler,
7871 &my_collation_utf32_uca_handler,
7872 PAD_SPACE};
7873
7874 CHARSET_INFO my_charset_utf32_latvian_uca_ci = {
7875 162,
7876 0,
7877 0, /* number */
7878 MY_CS_UTF32_UCA_FLAGS, /* state */
7879 "utf32", /* csname */
7880 "utf32_latvian_ci", /* m_coll_name */
7881 "", /* comment */
7882 latvian, /* tailoring */
7883 nullptr, /* coll_param */
7884 nullptr, /* ctype */
7885 nullptr, /* to_lower */
7886 nullptr, /* to_upper */
7887 nullptr, /* sort_order */
7888 nullptr, /* uca */
7889 nullptr, /* tab_to_uni */
7890 nullptr, /* tab_from_uni */
7891 &my_unicase_default, /* caseinfo */
7892 nullptr, /* state_map */
7893 nullptr, /* ident_map */
7894 8, /* strxfrm_multiply */
7895 1, /* caseup_multiply */
7896 1, /* casedn_multiply */
7897 4, /* mbminlen */
7898 4, /* mbmaxlen */
7899 1, /* mbmaxlenlen */
7900 9, /* min_sort_char */
7901 0xFFFF, /* max_sort_char */
7902 ' ', /* pad char */
7903 false, /* escape_with_backslash_is_dangerous */
7904 1, /* levels_for_compare */
7905 &my_charset_utf32_handler,
7906 &my_collation_utf32_uca_handler,
7907 PAD_SPACE};
7908
7909 CHARSET_INFO my_charset_utf32_romanian_uca_ci = {
7910 163,
7911 0,
7912 0, /* number */
7913 MY_CS_UTF32_UCA_FLAGS, /* state */
7914 "utf32", /* csname */
7915 "utf32_romanian_ci", /* m_coll_name */
7916 "", /* comment */
7917 romanian, /* tailoring */
7918 nullptr, /* coll_param */
7919 nullptr, /* ctype */
7920 nullptr, /* to_lower */
7921 nullptr, /* to_upper */
7922 nullptr, /* sort_order */
7923 nullptr, /* uca */
7924 nullptr, /* tab_to_uni */
7925 nullptr, /* tab_from_uni */
7926 &my_unicase_default, /* caseinfo */
7927 nullptr, /* state_map */
7928 nullptr, /* ident_map */
7929 8, /* strxfrm_multiply */
7930 1, /* caseup_multiply */
7931 1, /* casedn_multiply */
7932 4, /* mbminlen */
7933 4, /* mbmaxlen */
7934 1, /* mbmaxlenlen */
7935 9, /* min_sort_char */
7936 0xFFFF, /* max_sort_char */
7937 ' ', /* pad char */
7938 false, /* escape_with_backslash_is_dangerous */
7939 1, /* levels_for_compare */
7940 &my_charset_utf32_handler,
7941 &my_collation_utf32_uca_handler,
7942 PAD_SPACE};
7943
7944 CHARSET_INFO my_charset_utf32_slovenian_uca_ci = {
7945 164,
7946 0,
7947 0, /* number */
7948 MY_CS_UTF32_UCA_FLAGS, /* state */
7949 "utf32", /* csname */
7950 "utf32_slovenian_ci", /* m_coll_name */
7951 "", /* comment */
7952 slovenian, /* tailoring */
7953 nullptr, /* coll_param */
7954 nullptr, /* ctype */
7955 nullptr, /* to_lower */
7956 nullptr, /* to_upper */
7957 nullptr, /* sort_order */
7958 nullptr, /* uca */
7959 nullptr, /* tab_to_uni */
7960 nullptr, /* tab_from_uni */
7961 &my_unicase_default, /* caseinfo */
7962 nullptr, /* state_map */
7963 nullptr, /* ident_map */
7964 8, /* strxfrm_multiply */
7965 1, /* caseup_multiply */
7966 1, /* casedn_multiply */
7967 4, /* mbminlen */
7968 4, /* mbmaxlen */
7969 1, /* mbmaxlenlen */
7970 9, /* min_sort_char */
7971 0xFFFF, /* max_sort_char */
7972 ' ', /* pad char */
7973 false, /* escape_with_backslash_is_dangerous */
7974 1, /* levels_for_compare */
7975 &my_charset_utf32_handler,
7976 &my_collation_utf32_uca_handler,
7977 PAD_SPACE};
7978
7979 CHARSET_INFO my_charset_utf32_polish_uca_ci = {
7980 165,
7981 0,
7982 0, /* number */
7983 MY_CS_UTF32_UCA_FLAGS, /* state */
7984 "utf32", /* csname */
7985 "utf32_polish_ci", /* m_coll_name */
7986 "", /* comment */
7987 polish, /* tailoring */
7988 nullptr, /* coll_param */
7989 nullptr, /* ctype */
7990 nullptr, /* to_lower */
7991 nullptr, /* to_upper */
7992 nullptr, /* sort_order */
7993 nullptr, /* uca */
7994 nullptr, /* tab_to_uni */
7995 nullptr, /* tab_from_uni */
7996 &my_unicase_default, /* caseinfo */
7997 nullptr, /* state_map */
7998 nullptr, /* ident_map */
7999 8, /* strxfrm_multiply */
8000 1, /* caseup_multiply */
8001 1, /* casedn_multiply */
8002 4, /* mbminlen */
8003 4, /* mbmaxlen */
8004 1, /* mbmaxlenlen */
8005 9, /* min_sort_char */
8006 0xFFFF, /* max_sort_char */
8007 ' ', /* pad char */
8008 false, /* escape_with_backslash_is_dangerous */
8009 1, /* levels_for_compare */
8010 &my_charset_utf32_handler,
8011 &my_collation_utf32_uca_handler,
8012 PAD_SPACE};
8013
8014 CHARSET_INFO my_charset_utf32_estonian_uca_ci = {
8015 166,
8016 0,
8017 0, /* number */
8018 MY_CS_UTF32_UCA_FLAGS, /* state */
8019 "utf32", /* csname */
8020 "utf32_estonian_ci", /* m_coll_name */
8021 "", /* comment */
8022 estonian, /* tailoring */
8023 nullptr, /* coll_param */
8024 nullptr, /* ctype */
8025 nullptr, /* to_lower */
8026 nullptr, /* to_upper */
8027 nullptr, /* sort_order */
8028 nullptr, /* uca */
8029 nullptr, /* tab_to_uni */
8030 nullptr, /* tab_from_uni */
8031 &my_unicase_default, /* caseinfo */
8032 nullptr, /* state_map */
8033 nullptr, /* ident_map */
8034 8, /* strxfrm_multiply */
8035 1, /* caseup_multiply */
8036 1, /* casedn_multiply */
8037 4, /* mbminlen */
8038 4, /* mbmaxlen */
8039 1, /* mbmaxlenlen */
8040 9, /* min_sort_char */
8041 0xFFFF, /* max_sort_char */
8042 ' ', /* pad char */
8043 false, /* escape_with_backslash_is_dangerous */
8044 1, /* levels_for_compare */
8045 &my_charset_utf32_handler,
8046 &my_collation_utf32_uca_handler,
8047 PAD_SPACE};
8048
8049 CHARSET_INFO my_charset_utf32_spanish_uca_ci = {
8050 167,
8051 0,
8052 0, /* number */
8053 MY_CS_UTF32_UCA_FLAGS, /* state */
8054 "utf32", /* csname */
8055 "utf32_spanish_ci", /* m_coll_name */
8056 "", /* comment */
8057 spanish, /* tailoring */
8058 nullptr, /* coll_param */
8059 nullptr, /* ctype */
8060 nullptr, /* to_lower */
8061 nullptr, /* to_upper */
8062 nullptr, /* sort_order */
8063 nullptr, /* uca */
8064 nullptr, /* tab_to_uni */
8065 nullptr, /* tab_from_uni */
8066 &my_unicase_default, /* caseinfo */
8067 nullptr, /* state_map */
8068 nullptr, /* ident_map */
8069 8, /* strxfrm_multiply */
8070 1, /* caseup_multiply */
8071 1, /* casedn_multiply */
8072 4, /* mbminlen */
8073 4, /* mbmaxlen */
8074 1, /* mbmaxlenlen */
8075 9, /* min_sort_char */
8076 0xFFFF, /* max_sort_char */
8077 ' ', /* pad char */
8078 false, /* escape_with_backslash_is_dangerous */
8079 1, /* levels_for_compare */
8080 &my_charset_utf32_handler,
8081 &my_collation_utf32_uca_handler,
8082 PAD_SPACE};
8083
8084 CHARSET_INFO my_charset_utf32_swedish_uca_ci = {
8085 168,
8086 0,
8087 0, /* number */
8088 MY_CS_UTF32_UCA_FLAGS, /* state */
8089 "utf32", /* csname */
8090 "utf32_swedish_ci", /* m_coll_name */
8091 "", /* comment */
8092 swedish, /* tailoring */
8093 nullptr, /* coll_param */
8094 nullptr, /* ctype */
8095 nullptr, /* to_lower */
8096 nullptr, /* to_upper */
8097 nullptr, /* sort_order */
8098 nullptr, /* uca */
8099 nullptr, /* tab_to_uni */
8100 nullptr, /* tab_from_uni */
8101 &my_unicase_default, /* caseinfo */
8102 nullptr, /* state_map */
8103 nullptr, /* ident_map */
8104 8, /* strxfrm_multiply */
8105 1, /* caseup_multiply */
8106 1, /* casedn_multiply */
8107 4, /* mbminlen */
8108 4, /* mbmaxlen */
8109 1, /* mbmaxlenlen */
8110 9, /* min_sort_char */
8111 0xFFFF, /* max_sort_char */
8112 ' ', /* pad char */
8113 false, /* escape_with_backslash_is_dangerous */
8114 1, /* levels_for_compare */
8115 &my_charset_utf32_handler,
8116 &my_collation_utf32_uca_handler,
8117 PAD_SPACE};
8118
8119 CHARSET_INFO my_charset_utf32_turkish_uca_ci = {
8120 169,
8121 0,
8122 0, /* number */
8123 MY_CS_UTF32_UCA_FLAGS, /* state */
8124 "utf32", /* csname */
8125 "utf32_turkish_ci", /* m_coll_name */
8126 "", /* comment */
8127 turkish, /* tailoring */
8128 nullptr, /* coll_param */
8129 nullptr, /* ctype */
8130 nullptr, /* to_lower */
8131 nullptr, /* to_upper */
8132 nullptr, /* sort_order */
8133 nullptr, /* uca */
8134 nullptr, /* tab_to_uni */
8135 nullptr, /* tab_from_uni */
8136 &my_unicase_turkish, /* caseinfo */
8137 nullptr, /* state_map */
8138 nullptr, /* ident_map */
8139 8, /* strxfrm_multiply */
8140 1, /* caseup_multiply */
8141 1, /* casedn_multiply */
8142 4, /* mbminlen */
8143 4, /* mbmaxlen */
8144 1, /* mbmaxlenlen */
8145 9, /* min_sort_char */
8146 0xFFFF, /* max_sort_char */
8147 ' ', /* pad char */
8148 false, /* escape_with_backslash_is_dangerous */
8149 1, /* levels_for_compare */
8150 &my_charset_utf32_handler,
8151 &my_collation_utf32_uca_handler,
8152 PAD_SPACE};
8153
8154 CHARSET_INFO my_charset_utf32_czech_uca_ci = {
8155 170,
8156 0,
8157 0, /* number */
8158 MY_CS_UTF32_UCA_FLAGS, /* state */
8159 "utf32", /* csname */
8160 "utf32_czech_ci", /* m_coll_name */
8161 "", /* comment */
8162 czech, /* tailoring */
8163 nullptr, /* coll_param */
8164 nullptr, /* ctype */
8165 nullptr, /* to_lower */
8166 nullptr, /* to_upper */
8167 nullptr, /* sort_order */
8168 nullptr, /* uca */
8169 nullptr, /* tab_to_uni */
8170 nullptr, /* tab_from_uni */
8171 &my_unicase_default, /* caseinfo */
8172 nullptr, /* state_map */
8173 nullptr, /* ident_map */
8174 8, /* strxfrm_multiply */
8175 1, /* caseup_multiply */
8176 1, /* casedn_multiply */
8177 4, /* mbminlen */
8178 4, /* mbmaxlen */
8179 1, /* mbmaxlenlen */
8180 9, /* min_sort_char */
8181 0xFFFF, /* max_sort_char */
8182 ' ', /* pad char */
8183 false, /* escape_with_backslash_is_dangerous */
8184 1, /* levels_for_compare */
8185 &my_charset_utf32_handler,
8186 &my_collation_utf32_uca_handler,
8187 PAD_SPACE};
8188
8189 CHARSET_INFO my_charset_utf32_danish_uca_ci = {
8190 171,
8191 0,
8192 0, /* number */
8193 MY_CS_UTF32_UCA_FLAGS, /* state */
8194 "utf32", /* csname */
8195 "utf32_danish_ci", /* m_coll_name */
8196 "", /* comment */
8197 danish, /* tailoring */
8198 nullptr, /* coll_param */
8199 nullptr, /* ctype */
8200 nullptr, /* to_lower */
8201 nullptr, /* to_upper */
8202 nullptr, /* sort_order */
8203 nullptr, /* uca */
8204 nullptr, /* tab_to_uni */
8205 nullptr, /* tab_from_uni */
8206 &my_unicase_default, /* caseinfo */
8207 nullptr, /* state_map */
8208 nullptr, /* ident_map */
8209 8, /* strxfrm_multiply */
8210 1, /* caseup_multiply */
8211 1, /* casedn_multiply */
8212 4, /* mbminlen */
8213 4, /* mbmaxlen */
8214 1, /* mbmaxlenlen */
8215 9, /* min_sort_char */
8216 0xFFFF, /* max_sort_char */
8217 ' ', /* pad char */
8218 false, /* escape_with_backslash_is_dangerous */
8219 1, /* levels_for_compare */
8220 &my_charset_utf32_handler,
8221 &my_collation_utf32_uca_handler,
8222 PAD_SPACE};
8223
8224 CHARSET_INFO my_charset_utf32_lithuanian_uca_ci = {
8225 172,
8226 0,
8227 0, /* number */
8228 MY_CS_UTF32_UCA_FLAGS, /* state */
8229 "utf32", /* csname */
8230 "utf32_lithuanian_ci", /* m_coll_name */
8231 "", /* comment */
8232 lithuanian, /* tailoring */
8233 nullptr, /* coll_param */
8234 nullptr, /* ctype */
8235 nullptr, /* to_lower */
8236 nullptr, /* to_upper */
8237 nullptr, /* sort_order */
8238 nullptr, /* uca */
8239 nullptr, /* tab_to_uni */
8240 nullptr, /* tab_from_uni */
8241 &my_unicase_default, /* caseinfo */
8242 nullptr, /* state_map */
8243 nullptr, /* ident_map */
8244 8, /* strxfrm_multiply */
8245 1, /* caseup_multiply */
8246 1, /* casedn_multiply */
8247 4, /* mbminlen */
8248 4, /* mbmaxlen */
8249 1, /* mbmaxlenlen */
8250 9, /* min_sort_char */
8251 0xFFFF, /* max_sort_char */
8252 ' ', /* pad char */
8253 false, /* escape_with_backslash_is_dangerous */
8254 1, /* levels_for_compare */
8255 &my_charset_utf32_handler,
8256 &my_collation_utf32_uca_handler,
8257 PAD_SPACE};
8258
8259 CHARSET_INFO my_charset_utf32_slovak_uca_ci = {
8260 173,
8261 0,
8262 0, /* number */
8263 MY_CS_UTF32_UCA_FLAGS, /* state */
8264 "utf32", /* csname */
8265 "utf32_slovak_ci", /* m_coll_name */
8266 "", /* comment */
8267 slovak, /* tailoring */
8268 nullptr, /* coll_param */
8269 nullptr, /* ctype */
8270 nullptr, /* to_lower */
8271 nullptr, /* to_upper */
8272 nullptr, /* sort_order */
8273 nullptr, /* uca */
8274 nullptr, /* tab_to_uni */
8275 nullptr, /* tab_from_uni */
8276 &my_unicase_default, /* caseinfo */
8277 nullptr, /* state_map */
8278 nullptr, /* ident_map */
8279 8, /* strxfrm_multiply */
8280 1, /* caseup_multiply */
8281 1, /* casedn_multiply */
8282 4, /* mbminlen */
8283 4, /* mbmaxlen */
8284 1, /* mbmaxlenlen */
8285 9, /* min_sort_char */
8286 0xFFFF, /* max_sort_char */
8287 ' ', /* pad char */
8288 false, /* escape_with_backslash_is_dangerous */
8289 1, /* levels_for_compare */
8290 &my_charset_utf32_handler,
8291 &my_collation_utf32_uca_handler,
8292 PAD_SPACE};
8293
8294 CHARSET_INFO my_charset_utf32_spanish2_uca_ci = {
8295 174,
8296 0,
8297 0, /* number */
8298 MY_CS_UTF32_UCA_FLAGS, /* state */
8299 "utf32", /* csname */
8300 "utf32_spanish2_ci", /* m_coll_name */
8301 "", /* comment */
8302 spanish2, /* tailoring */
8303 nullptr, /* coll_param */
8304 nullptr, /* ctype */
8305 nullptr, /* to_lower */
8306 nullptr, /* to_upper */
8307 nullptr, /* sort_order */
8308 nullptr, /* uca */
8309 nullptr, /* tab_to_uni */
8310 nullptr, /* tab_from_uni */
8311 &my_unicase_default, /* caseinfo */
8312 nullptr, /* state_map */
8313 nullptr, /* ident_map */
8314 8, /* strxfrm_multiply */
8315 1, /* caseup_multiply */
8316 1, /* casedn_multiply */
8317 4, /* mbminlen */
8318 4, /* mbmaxlen */
8319 1, /* mbmaxlenlen */
8320 9, /* min_sort_char */
8321 0xFFFF, /* max_sort_char */
8322 ' ', /* pad char */
8323 false, /* escape_with_backslash_is_dangerous */
8324 1, /* levels_for_compare */
8325 &my_charset_utf32_handler,
8326 &my_collation_utf32_uca_handler,
8327 PAD_SPACE};
8328
8329 CHARSET_INFO my_charset_utf32_roman_uca_ci = {
8330 175,
8331 0,
8332 0, /* number */
8333 MY_CS_UTF32_UCA_FLAGS, /* state */
8334 "utf32", /* csname */
8335 "utf32_roman_ci", /* m_coll_name */
8336 "", /* comment */
8337 roman, /* tailoring */
8338 nullptr, /* coll_param */
8339 nullptr, /* ctype */
8340 nullptr, /* to_lower */
8341 nullptr, /* to_upper */
8342 nullptr, /* sort_order */
8343 nullptr, /* uca */
8344 nullptr, /* tab_to_uni */
8345 nullptr, /* tab_from_uni */
8346 &my_unicase_default, /* caseinfo */
8347 nullptr, /* state_map */
8348 nullptr, /* ident_map */
8349 8, /* strxfrm_multiply */
8350 1, /* caseup_multiply */
8351 1, /* casedn_multiply */
8352 4, /* mbminlen */
8353 4, /* mbmaxlen */
8354 1, /* mbmaxlenlen */
8355 9, /* min_sort_char */
8356 0xFFFF, /* max_sort_char */
8357 ' ', /* pad char */
8358 false, /* escape_with_backslash_is_dangerous */
8359 1, /* levels_for_compare */
8360 &my_charset_utf32_handler,
8361 &my_collation_utf32_uca_handler,
8362 PAD_SPACE};
8363
8364 CHARSET_INFO my_charset_utf32_persian_uca_ci = {
8365 176,
8366 0,
8367 0, /* number */
8368 MY_CS_UTF32_UCA_FLAGS, /* state */
8369 "utf32", /* csname */
8370 "utf32_persian_ci", /* m_coll_name */
8371 "", /* comment */
8372 persian, /* tailoring */
8373 nullptr, /* coll_param */
8374 nullptr, /* ctype */
8375 nullptr, /* to_lower */
8376 nullptr, /* to_upper */
8377 nullptr, /* sort_order */
8378 nullptr, /* uca */
8379 nullptr, /* tab_to_uni */
8380 nullptr, /* tab_from_uni */
8381 &my_unicase_default, /* caseinfo */
8382 nullptr, /* state_map */
8383 nullptr, /* ident_map */
8384 8, /* strxfrm_multiply */
8385 1, /* caseup_multiply */
8386 1, /* casedn_multiply */
8387 4, /* mbminlen */
8388 4, /* mbmaxlen */
8389 1, /* mbmaxlenlen */
8390 9, /* min_sort_char */
8391 0xFFFF, /* max_sort_char */
8392 ' ', /* pad char */
8393 false, /* escape_with_backslash_is_dangerous */
8394 1, /* levels_for_compare */
8395 &my_charset_utf32_handler,
8396 &my_collation_utf32_uca_handler,
8397 PAD_SPACE};
8398
8399 CHARSET_INFO my_charset_utf32_esperanto_uca_ci = {
8400 177,
8401 0,
8402 0, /* number */
8403 MY_CS_UTF32_UCA_FLAGS, /* state */
8404 "utf32", /* csname */
8405 "utf32_esperanto_ci", /* m_coll_name */
8406 "", /* comment */
8407 esperanto, /* tailoring */
8408 nullptr, /* coll_param */
8409 nullptr, /* ctype */
8410 nullptr, /* to_lower */
8411 nullptr, /* to_upper */
8412 nullptr, /* sort_order */
8413 nullptr, /* uca */
8414 nullptr, /* tab_to_uni */
8415 nullptr, /* tab_from_uni */
8416 &my_unicase_default, /* caseinfo */
8417 nullptr, /* state_map */
8418 nullptr, /* ident_map */
8419 8, /* strxfrm_multiply */
8420 1, /* caseup_multiply */
8421 1, /* casedn_multiply */
8422 4, /* mbminlen */
8423 4, /* mbmaxlen */
8424 1, /* mbmaxlenlen */
8425 9, /* min_sort_char */
8426 0xFFFF, /* max_sort_char */
8427 ' ', /* pad char */
8428 false, /* escape_with_backslash_is_dangerous */
8429 1, /* levels_for_compare */
8430 &my_charset_utf32_handler,
8431 &my_collation_utf32_uca_handler,
8432 PAD_SPACE};
8433
8434 CHARSET_INFO my_charset_utf32_hungarian_uca_ci = {
8435 178,
8436 0,
8437 0, /* number */
8438 MY_CS_UTF32_UCA_FLAGS, /* state */
8439 "utf32", /* csname */
8440 "utf32_hungarian_ci", /* m_coll_name */
8441 "", /* comment */
8442 hungarian, /* tailoring */
8443 nullptr, /* coll_param */
8444 nullptr, /* ctype */
8445 nullptr, /* to_lower */
8446 nullptr, /* to_upper */
8447 nullptr, /* sort_order */
8448 nullptr, /* uca */
8449 nullptr, /* tab_to_uni */
8450 nullptr, /* tab_from_uni */
8451 &my_unicase_default, /* caseinfo */
8452 nullptr, /* state_map */
8453 nullptr, /* ident_map */
8454 8, /* strxfrm_multiply */
8455 1, /* caseup_multiply */
8456 1, /* casedn_multiply */
8457 4, /* mbminlen */
8458 4, /* mbmaxlen */
8459 1, /* mbmaxlenlen */
8460 9, /* min_sort_char */
8461 0xFFFF, /* max_sort_char */
8462 ' ', /* pad char */
8463 false, /* escape_with_backslash_is_dangerous */
8464 1, /* levels_for_compare */
8465 &my_charset_utf32_handler,
8466 &my_collation_utf32_uca_handler,
8467 PAD_SPACE};
8468
8469 CHARSET_INFO my_charset_utf32_sinhala_uca_ci = {
8470 179,
8471 0,
8472 0, /* number */
8473 MY_CS_UTF32_UCA_FLAGS, /* state */
8474 "utf32", /* csname */
8475 "utf32_sinhala_ci", /* m_coll_name */
8476 "", /* comment */
8477 sinhala, /* tailoring */
8478 nullptr, /* coll_param */
8479 nullptr, /* ctype */
8480 nullptr, /* to_lower */
8481 nullptr, /* to_upper */
8482 nullptr, /* sort_order */
8483 nullptr, /* uca */
8484 nullptr, /* tab_to_uni */
8485 nullptr, /* tab_from_uni */
8486 &my_unicase_default, /* caseinfo */
8487 nullptr, /* state_map */
8488 nullptr, /* ident_map */
8489 8, /* strxfrm_multiply */
8490 1, /* caseup_multiply */
8491 1, /* casedn_multiply */
8492 4, /* mbminlen */
8493 4, /* mbmaxlen */
8494 1, /* mbmaxlenlen */
8495 9, /* min_sort_char */
8496 0xFFFF, /* max_sort_char */
8497 ' ', /* pad char */
8498 false, /* escape_with_backslash_is_dangerous */
8499 1, /* levels_for_compare */
8500 &my_charset_utf32_handler,
8501 &my_collation_utf32_uca_handler,
8502 PAD_SPACE};
8503
8504 CHARSET_INFO my_charset_utf32_german2_uca_ci = {
8505 180,
8506 0,
8507 0, /* number */
8508 MY_CS_UTF32_UCA_FLAGS, /* state */
8509 "utf32", /* csname */
8510 "utf32_german2_ci", /* m_coll_name */
8511 "", /* comment */
8512 german2, /* tailoring */
8513 nullptr, /* coll_param */
8514 nullptr, /* ctype */
8515 nullptr, /* to_lower */
8516 nullptr, /* to_upper */
8517 nullptr, /* sort_order */
8518 nullptr, /* uca */
8519 nullptr, /* tab_to_uni */
8520 nullptr, /* tab_from_uni */
8521 &my_unicase_default, /* caseinfo */
8522 nullptr, /* state_map */
8523 nullptr, /* ident_map */
8524 8, /* strxfrm_multiply */
8525 1, /* caseup_multiply */
8526 1, /* casedn_multiply */
8527 4, /* mbminlen */
8528 4, /* mbmaxlen */
8529 1, /* mbmaxlenlen */
8530 9, /* min_sort_char */
8531 0xFFFF, /* max_sort_char */
8532 ' ', /* pad char */
8533 false, /* escape_with_backslash_is_dangerous */
8534 1, /* levels_for_compare */
8535 &my_charset_utf32_handler,
8536 &my_collation_utf32_uca_handler,
8537 PAD_SPACE};
8538
8539 CHARSET_INFO my_charset_utf32_croatian_uca_ci = {
8540 181,
8541 0,
8542 0, /* number */
8543 MY_CS_UTF32_UCA_FLAGS, /* state */
8544 "utf32", /* csname */
8545 "utf32_croatian_ci", /* m_coll_name */
8546 "", /* comment */
8547 croatian, /* tailoring */
8548 nullptr, /* coll_param */
8549 nullptr, /* ctype */
8550 nullptr, /* to_lower */
8551 nullptr, /* to_upper */
8552 nullptr, /* sort_order */
8553 nullptr, /* uca */
8554 nullptr, /* tab_to_uni */
8555 nullptr, /* tab_from_uni */
8556 &my_unicase_default, /* caseinfo */
8557 nullptr, /* state_map */
8558 nullptr, /* ident_map */
8559 8, /* strxfrm_multiply */
8560 1, /* caseup_multiply */
8561 1, /* casedn_multiply */
8562 4, /* mbminlen */
8563 4, /* mbmaxlen */
8564 1, /* mbmaxlenlen */
8565 9, /* min_sort_char */
8566 0xFFFF, /* max_sort_char */
8567 ' ', /* pad char */
8568 false, /* escape_with_backslash_is_dangerous */
8569 1, /* levels_for_compare */
8570 &my_charset_utf32_handler,
8571 &my_collation_utf32_uca_handler,
8572 PAD_SPACE};
8573
8574 CHARSET_INFO my_charset_utf32_unicode_520_ci = {
8575 182,
8576 0,
8577 0, /* number */
8578 MY_CS_UTF32_UCA_FLAGS, /* stat e */
8579 "utf32", /* csname */
8580 "utf32_unicode_520_ci", /* m_coll_name */
8581 "", /* comment */
8582 "", /* tailoring */
8583 nullptr, /* coll_param */
8584 nullptr, /* ctype */
8585 nullptr, /* to_lower */
8586 nullptr, /* to_upper */
8587 nullptr, /* sort_order */
8588 &my_uca_v520, /* uca */
8589 nullptr, /* tab_to_uni */
8590 nullptr, /* tab_from_uni */
8591 &my_unicase_unicode520, /* caseinfo */
8592 nullptr, /* state_map */
8593 nullptr, /* ident_map */
8594 8, /* strxfrm_multiply */
8595 1, /* caseup_multiply */
8596 1, /* casedn_multiply */
8597 4, /* mbminlen */
8598 4, /* mbmaxlen */
8599 1, /* mbmaxlenlen */
8600 9, /* min_sort_char */
8601 0x10FFFF, /* max_sort_char */
8602 ' ', /* pad char */
8603 false, /* escape_with_backslash_is_dangerous */
8604 1, /* levels_for_compare */
8605 &my_charset_utf32_handler,
8606 &my_collation_utf32_uca_handler,
8607 PAD_SPACE};
8608
8609 CHARSET_INFO my_charset_utf32_vietnamese_ci = {
8610 183,
8611 0,
8612 0, /* number */
8613 MY_CS_UTF32_UCA_FLAGS, /* state */
8614 "utf32", /* csname */
8615 "utf32_vietnamese_ci", /* m_coll_name */
8616 "", /* comment */
8617 vietnamese, /* tailoring */
8618 nullptr, /* coll_param */
8619 nullptr, /* ctype */
8620 nullptr, /* to_lower */
8621 nullptr, /* to_upper */
8622 nullptr, /* sort_order */
8623 nullptr, /* uca */
8624 nullptr, /* tab_to_uni */
8625 nullptr, /* tab_from_uni */
8626 &my_unicase_default, /* caseinfo */
8627 nullptr, /* state_map */
8628 nullptr, /* ident_map */
8629 8, /* strxfrm_multiply */
8630 1, /* caseup_multiply */
8631 1, /* casedn_multiply */
8632 4, /* mbminlen */
8633 4, /* mbmaxlen */
8634 1, /* mbmaxlenlen */
8635 9, /* min_sort_char */
8636 0xFFFF, /* max_sort_char */
8637 ' ', /* pad char */
8638 false, /* escape_with_backslash_is_dangerous */
8639 1, /* levels_for_compare */
8640 &my_charset_utf32_handler,
8641 &my_collation_utf32_uca_handler,
8642 PAD_SPACE};
8643
8644 MY_COLLATION_HANDLER my_collation_utf16_uca_handler = {
8645 my_coll_init_uca, /* init */
8646 my_coll_uninit_uca,
8647 my_strnncoll_any_uca,
8648 my_strnncollsp_any_uca,
8649 my_strnxfrm_any_uca,
8650 my_strnxfrmlen_simple,
8651 my_like_range_generic,
8652 my_wildcmp_uca,
8653 nullptr,
8654 my_instr_mb,
8655 my_hash_sort_any_uca,
8656 my_propagate_complex};
8657
8658 extern MY_CHARSET_HANDLER my_charset_utf16_handler;
8659
8660 #define MY_CS_UTF16_UCA_FLAGS \
8661 (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII)
8662
8663 CHARSET_INFO my_charset_utf16_unicode_ci = {
8664 101,
8665 0,
8666 0, /* number */
8667 MY_CS_UTF16_UCA_FLAGS, /* state */
8668 "utf16", /* csname */
8669 "utf16_unicode_ci", /* m_coll_name */
8670 "", /* comment */
8671 "", /* tailoring */
8672 nullptr, /* coll_param */
8673 nullptr, /* ctype */
8674 nullptr, /* to_lower */
8675 nullptr, /* to_upper */
8676 nullptr, /* sort_order */
8677 nullptr, /* uca */
8678 nullptr, /* tab_to_uni */
8679 nullptr, /* tab_from_uni */
8680 &my_unicase_default, /* caseinfo */
8681 nullptr, /* state_map */
8682 nullptr, /* ident_map */
8683 8, /* strxfrm_multiply */
8684 1, /* caseup_multiply */
8685 1, /* casedn_multiply */
8686 2, /* mbminlen */
8687 4, /* mbmaxlen */
8688 1, /* mbmaxlenlen */
8689 9, /* min_sort_char */
8690 0xFFFF, /* max_sort_char */
8691 ' ', /* pad char */
8692 false, /* escape_with_backslash_is_dangerous */
8693 1, /* levels_for_compare */
8694 &my_charset_utf16_handler,
8695 &my_collation_utf16_uca_handler,
8696 PAD_SPACE};
8697
8698 CHARSET_INFO my_charset_utf16_icelandic_uca_ci = {
8699 102,
8700 0,
8701 0, /* number */
8702 MY_CS_UTF16_UCA_FLAGS, /* state */
8703 "utf16", /* csname */
8704 "utf16_icelandic_ci", /* m_coll_name */
8705 "", /* comment */
8706 icelandic, /* tailoring */
8707 nullptr, /* coll_param */
8708 nullptr, /* ctype */
8709 nullptr, /* to_lower */
8710 nullptr, /* to_upper */
8711 nullptr, /* sort_order */
8712 nullptr, /* uca */
8713 nullptr, /* tab_to_uni */
8714 nullptr, /* tab_from_uni */
8715 &my_unicase_default, /* caseinfo */
8716 nullptr, /* state_map */
8717 nullptr, /* ident_map */
8718 8, /* strxfrm_multiply */
8719 1, /* caseup_multiply */
8720 1, /* casedn_multiply */
8721 2, /* mbminlen */
8722 4, /* mbmaxlen */
8723 1, /* mbmaxlenlen */
8724 9, /* min_sort_char */
8725 0xFFFF, /* max_sort_char */
8726 ' ', /* pad char */
8727 false, /* escape_with_backslash_is_dangerous */
8728 1, /* levels_for_compare */
8729 &my_charset_utf16_handler,
8730 &my_collation_utf16_uca_handler,
8731 PAD_SPACE};
8732
8733 CHARSET_INFO my_charset_utf16_latvian_uca_ci = {
8734 103,
8735 0,
8736 0, /* number */
8737 MY_CS_UTF16_UCA_FLAGS, /* state */
8738 "utf16", /* cs name */
8739 "utf16_latvian_ci", /* m_coll_name */
8740 "", /* comment */
8741 latvian, /* tailoring */
8742 nullptr, /* coll_param */
8743 nullptr, /* ctype */
8744 nullptr, /* to_lower */
8745 nullptr, /* to_upper */
8746 nullptr, /* sort_order */
8747 nullptr, /* uca */
8748 nullptr, /* tab_to_uni */
8749 nullptr, /* tab_from_uni */
8750 &my_unicase_default, /* caseinfo */
8751 nullptr, /* state_map */
8752 nullptr, /* ident_map */
8753 8, /* strxfrm_multiply */
8754 1, /* caseup_multiply */
8755 1, /* casedn_multiply */
8756 2, /* mbminlen */
8757 4, /* mbmaxlen */
8758 1, /* mbmaxlenlen */
8759 9, /* min_sort_char */
8760 0xFFFF, /* max_sort_char */
8761 ' ', /* pad char */
8762 false, /* escape_with_backslash_is_dangerous */
8763 1, /* levels_for_compare */
8764 &my_charset_utf16_handler,
8765 &my_collation_utf16_uca_handler,
8766 PAD_SPACE};
8767
8768 CHARSET_INFO my_charset_utf16_romanian_uca_ci = {
8769 104,
8770 0,
8771 0, /* number */
8772 MY_CS_UTF16_UCA_FLAGS, /* state */
8773 "utf16", /* cs name */
8774 "utf16_romanian_ci", /* m_coll_name */
8775 "", /* comment */
8776 romanian, /* tailoring */
8777 nullptr, /* coll_param */
8778 nullptr, /* ctype */
8779 nullptr, /* to_lower */
8780 nullptr, /* to_upper */
8781 nullptr, /* sort_order */
8782 nullptr, /* uca */
8783 nullptr, /* tab_to_uni */
8784 nullptr, /* tab_from_uni */
8785 &my_unicase_default, /* caseinfo */
8786 nullptr, /* state_map */
8787 nullptr, /* ident_map */
8788 8, /* strxfrm_multiply */
8789 1, /* caseup_multiply */
8790 1, /* casedn_multiply */
8791 2, /* mbminlen */
8792 4, /* mbmaxlen */
8793 1, /* mbmaxlenlen */
8794 9, /* min_sort_char */
8795 0xFFFF, /* max_sort_char */
8796 ' ', /* pad char */
8797 false, /* escape_with_backslash_is_dangerous */
8798 1, /* levels_for_compare */
8799 &my_charset_utf16_handler,
8800 &my_collation_utf16_uca_handler,
8801 PAD_SPACE};
8802
8803 CHARSET_INFO my_charset_utf16_slovenian_uca_ci = {
8804 105,
8805 0,
8806 0, /* number */
8807 MY_CS_UTF16_UCA_FLAGS, /* state */
8808 "utf16", /* cs name */
8809 "utf16_slovenian_ci", /* m_coll_name */
8810 "", /* comment */
8811 slovenian, /* tailoring */
8812 nullptr, /* coll_param */
8813 nullptr, /* ctype */
8814 nullptr, /* to_lower */
8815 nullptr, /* to_upper */
8816 nullptr, /* sort_order */
8817 nullptr, /* uca */
8818 nullptr, /* tab_to_uni */
8819 nullptr, /* tab_from_uni */
8820 &my_unicase_default, /* caseinfo */
8821 nullptr, /* state_map */
8822 nullptr, /* ident_map */
8823 8, /* strxfrm_multiply */
8824 1, /* caseup_multiply */
8825 1, /* casedn_multiply */
8826 2, /* mbminlen */
8827 4, /* mbmaxlen */
8828 1, /* mbmaxlenlen */
8829 9, /* min_sort_char */
8830 0xFFFF, /* max_sort_char */
8831 ' ', /* pad char */
8832 false, /* escape_with_backslash_is_dangerous */
8833 1, /* levels_for_compare */
8834 &my_charset_utf16_handler,
8835 &my_collation_utf16_uca_handler,
8836 PAD_SPACE};
8837
8838 CHARSET_INFO my_charset_utf16_polish_uca_ci = {
8839 106,
8840 0,
8841 0, /* number */
8842 MY_CS_UTF16_UCA_FLAGS, /* state */
8843 "utf16", /* cs name */
8844 "utf16_polish_ci", /* m_coll_name */
8845 "", /* comment */
8846 polish, /* tailoring */
8847 nullptr, /* coll_param */
8848 nullptr, /* ctype */
8849 nullptr, /* to_lower */
8850 nullptr, /* to_upper */
8851 nullptr, /* sort_order */
8852 nullptr, /* uca */
8853 nullptr, /* tab_to_uni */
8854 nullptr, /* tab_from_uni */
8855 &my_unicase_default, /* caseinfo */
8856 nullptr, /* state_map */
8857 nullptr, /* ident_map */
8858 8, /* strxfrm_multiply */
8859 1, /* caseup_multiply */
8860 1, /* casedn_multiply */
8861 2, /* mbminlen */
8862 4, /* mbmaxlen */
8863 1, /* mbmaxlenlen */
8864 9, /* min_sort_char */
8865 0xFFFF, /* max_sort_char */
8866 ' ', /* pad char */
8867 false, /* escape_with_backslash_is_dangerous */
8868 1, /* levels_for_compare */
8869 &my_charset_utf16_handler,
8870 &my_collation_utf16_uca_handler,
8871 PAD_SPACE};
8872
8873 CHARSET_INFO my_charset_utf16_estonian_uca_ci = {
8874 107,
8875 0,
8876 0, /* number */
8877 MY_CS_UTF16_UCA_FLAGS, /* state */
8878 "utf16", /* cs name */
8879 "utf16_estonian_ci", /* m_coll_name */
8880 "", /* comment */
8881 estonian, /* tailoring */
8882 nullptr, /* coll_param */
8883 nullptr, /* ctype */
8884 nullptr, /* to_lower */
8885 nullptr, /* to_upper */
8886 nullptr, /* sort_order */
8887 nullptr, /* uca */
8888 nullptr, /* tab_to_uni */
8889 nullptr, /* tab_from_uni */
8890 &my_unicase_default, /* caseinfo */
8891 nullptr, /* state_map */
8892 nullptr, /* ident_map */
8893 8, /* strxfrm_multiply */
8894 1, /* caseup_multiply */
8895 1, /* casedn_multiply */
8896 2, /* mbminlen */
8897 4, /* mbmaxlen */
8898 1, /* mbmaxlenlen */
8899 9, /* min_sort_char */
8900 0xFFFF, /* max_sort_char */
8901 ' ', /* pad char */
8902 false, /* escape_with_backslash_is_dangerous */
8903 1, /* levels_for_compare */
8904 &my_charset_utf16_handler,
8905 &my_collation_utf16_uca_handler,
8906 PAD_SPACE};
8907
8908 CHARSET_INFO my_charset_utf16_spanish_uca_ci = {
8909 108,
8910 0,
8911 0, /* number */
8912 MY_CS_UTF16_UCA_FLAGS, /* state */
8913 "utf16", /* cs name */
8914 "utf16_spanish_ci", /* m_coll_name */
8915 "", /* comment */
8916 spanish, /* tailoring */
8917 nullptr, /* coll_param */
8918 nullptr, /* ctype */
8919 nullptr, /* to_lower */
8920 nullptr, /* to_upper */
8921 nullptr, /* sort_order */
8922 nullptr, /* uca */
8923 nullptr, /* tab_to_uni */
8924 nullptr, /* tab_from_uni */
8925 &my_unicase_default, /* caseinfo */
8926 nullptr, /* state_map */
8927 nullptr, /* ident_map */
8928 8, /* strxfrm_multiply */
8929 1, /* caseup_multiply */
8930 1, /* casedn_multiply */
8931 2, /* mbminlen */
8932 4, /* mbmaxlen */
8933 1, /* mbmaxlenlen */
8934 9, /* min_sort_char */
8935 0xFFFF, /* max_sort_char */
8936 ' ', /* pad char */
8937 false, /* escape_with_backslash_is_dangerous */
8938 1, /* levels_for_compare */
8939 &my_charset_utf16_handler,
8940 &my_collation_utf16_uca_handler,
8941 PAD_SPACE};
8942
8943 CHARSET_INFO my_charset_utf16_swedish_uca_ci = {
8944 109,
8945 0,
8946 0, /* number */
8947 MY_CS_UTF16_UCA_FLAGS, /* state */
8948 "utf16", /* cs name */
8949 "utf16_swedish_ci", /* m_coll_name */
8950 "", /* comment */
8951 swedish, /* tailoring */
8952 nullptr, /* coll_param */
8953 nullptr, /* ctype */
8954 nullptr, /* to_lower */
8955 nullptr, /* to_upper */
8956 nullptr, /* sort_order */
8957 nullptr, /* uca */
8958 nullptr, /* tab_to_uni */
8959 nullptr, /* tab_from_uni */
8960 &my_unicase_default, /* caseinfo */
8961 nullptr, /* state_map */
8962 nullptr, /* ident_map */
8963 8, /* strxfrm_multiply */
8964 1, /* caseup_multiply */
8965 1, /* casedn_multiply */
8966 2, /* mbminlen */
8967 4, /* mbmaxlen */
8968 1, /* mbmaxlenlen */
8969 9, /* min_sort_char */
8970 0xFFFF, /* max_sort_char */
8971 ' ', /* pad char */
8972 false, /* escape_with_backslash_is_dangerous */
8973 1, /* levels_for_compare */
8974 &my_charset_utf16_handler,
8975 &my_collation_utf16_uca_handler,
8976 PAD_SPACE};
8977
8978 CHARSET_INFO my_charset_utf16_turkish_uca_ci = {
8979 110,
8980 0,
8981 0, /* number */
8982 MY_CS_UTF16_UCA_FLAGS, /* state */
8983 "utf16", /* cs name */
8984 "utf16_turkish_ci", /* m_coll_name */
8985 "", /* comment */
8986 turkish, /* tailoring */
8987 nullptr, /* coll_param */
8988 nullptr, /* ctype */
8989 nullptr, /* to_lower */
8990 nullptr, /* to_upper */
8991 nullptr, /* sort_order */
8992 nullptr, /* uca */
8993 nullptr, /* tab_to_uni */
8994 nullptr, /* tab_from_uni */
8995 &my_unicase_turkish, /* caseinfo */
8996 nullptr, /* state_map */
8997 nullptr, /* ident_map */
8998 8, /* strxfrm_multiply */
8999 1, /* caseup_multiply */
9000 1, /* casedn_multiply */
9001 2, /* mbminlen */
9002 4, /* mbmaxlen */
9003 1, /* mbmaxlenlen */
9004 9, /* min_sort_char */
9005 0xFFFF, /* max_sort_char */
9006 ' ', /* pad char */
9007 false, /* escape_with_backslash_is_dangerous */
9008 1, /* levels_for_compare */
9009 &my_charset_utf16_handler,
9010 &my_collation_utf16_uca_handler,
9011 PAD_SPACE};
9012
9013 CHARSET_INFO my_charset_utf16_czech_uca_ci = {
9014 111,
9015 0,
9016 0, /* number */
9017 MY_CS_UTF16_UCA_FLAGS, /* state */
9018 "utf16", /* cs name */
9019 "utf16_czech_ci", /* m_coll_name */
9020 "", /* comment */
9021 czech, /* tailoring */
9022 nullptr, /* coll_param */
9023 nullptr, /* ctype */
9024 nullptr, /* to_lower */
9025 nullptr, /* to_upper */
9026 nullptr, /* sort_order */
9027 nullptr, /* uca */
9028 nullptr, /* tab_to_uni */
9029 nullptr, /* tab_from_uni */
9030 &my_unicase_default, /* caseinfo */
9031 nullptr, /* state_map */
9032 nullptr, /* ident_map */
9033 8, /* strxfrm_multiply */
9034 1, /* caseup_multiply */
9035 1, /* casedn_multiply */
9036 2, /* mbminlen */
9037 4, /* mbmaxlen */
9038 1, /* mbmaxlenlen */
9039 9, /* min_sort_char */
9040 0xFFFF, /* max_sort_char */
9041 ' ', /* pad char */
9042 false, /* escape_with_backslash_is_dangerous */
9043 1, /* levels_for_compare */
9044 &my_charset_utf16_handler,
9045 &my_collation_utf16_uca_handler,
9046 PAD_SPACE};
9047
9048 CHARSET_INFO my_charset_utf16_danish_uca_ci = {
9049 112,
9050 0,
9051 0, /* number */
9052 MY_CS_UTF16_UCA_FLAGS, /* state */
9053 "utf16", /* cs name */
9054 "utf16_danish_ci", /* m_coll_name */
9055 "", /* comment */
9056 danish, /* tailoring */
9057 nullptr, /* coll_param */
9058 nullptr, /* ctype */
9059 nullptr, /* to_lower */
9060 nullptr, /* to_upper */
9061 nullptr, /* sort_order */
9062 nullptr, /* uca */
9063 nullptr, /* tab_to_uni */
9064 nullptr, /* tab_from_uni */
9065 &my_unicase_default, /* caseinfo */
9066 nullptr, /* state_map */
9067 nullptr, /* ident_map */
9068 8, /* strxfrm_multiply */
9069 1, /* caseup_multiply */
9070 1, /* casedn_multiply */
9071 2, /* mbminlen */
9072 4, /* mbmaxlen */
9073 1, /* mbmaxlenlen */
9074 9, /* min_sort_char */
9075 0xFFFF, /* max_sort_char */
9076 ' ', /* pad char */
9077 false, /* escape_with_backslash_is_dangerous */
9078 1, /* levels_for_compare */
9079 &my_charset_utf16_handler,
9080 &my_collation_utf16_uca_handler,
9081 PAD_SPACE};
9082
9083 CHARSET_INFO my_charset_utf16_lithuanian_uca_ci = {
9084 113,
9085 0,
9086 0, /* number */
9087 MY_CS_UTF16_UCA_FLAGS, /* state */
9088 "utf16", /* cs name */
9089 "utf16_lithuanian_ci", /* m_coll_name */
9090 "", /* comment */
9091 lithuanian, /* tailoring */
9092 nullptr, /* coll_param */
9093 nullptr, /* ctype */
9094 nullptr, /* to_lower */
9095 nullptr, /* to_upper */
9096 nullptr, /* sort_order */
9097 nullptr, /* uca */
9098 nullptr, /* tab_to_uni */
9099 nullptr, /* tab_from_uni */
9100 &my_unicase_default, /* caseinfo */
9101 nullptr, /* state_map */
9102 nullptr, /* ident_map */
9103 8, /* strxfrm_multiply */
9104 1, /* caseup_multiply */
9105 1, /* casedn_multiply */
9106 2, /* mbminlen */
9107 4, /* mbmaxlen */
9108 1, /* mbmaxlenlen */
9109 9, /* min_sort_char */
9110 0xFFFF, /* max_sort_char */
9111 ' ', /* pad char */
9112 false, /* escape_with_backslash_is_dangerous */
9113 1, /* levels_for_compare */
9114 &my_charset_utf16_handler,
9115 &my_collation_utf16_uca_handler,
9116 PAD_SPACE};
9117
9118 CHARSET_INFO my_charset_utf16_slovak_uca_ci = {
9119 114,
9120 0,
9121 0, /* number */
9122 MY_CS_UTF16_UCA_FLAGS, /* state */
9123 "utf16", /* cs name */
9124 "utf16_slovak_ci", /* m_coll_name */
9125 "", /* comment */
9126 slovak, /* tailoring */
9127 nullptr, /* coll_param */
9128 nullptr, /* ctype */
9129 nullptr, /* to_lower */
9130 nullptr, /* to_upper */
9131 nullptr, /* sort_order */
9132 nullptr, /* uca */
9133 nullptr, /* tab_to_uni */
9134 nullptr, /* tab_from_uni */
9135 &my_unicase_default, /* caseinfo */
9136 nullptr, /* state_map */
9137 nullptr, /* ident_map */
9138 8, /* strxfrm_multiply */
9139 1, /* caseup_multiply */
9140 1, /* casedn_multiply */
9141 2, /* mbminlen */
9142 4, /* mbmaxlen */
9143 1, /* mbmaxlenlen */
9144 9, /* min_sort_char */
9145 0xFFFF, /* max_sort_char */
9146 ' ', /* pad char */
9147 false, /* escape_with_backslash_is_dangerous */
9148 1, /* levels_for_compare */
9149 &my_charset_utf16_handler,
9150 &my_collation_utf16_uca_handler,
9151 PAD_SPACE};
9152
9153 CHARSET_INFO my_charset_utf16_spanish2_uca_ci = {
9154 115,
9155 0,
9156 0, /* number */
9157 MY_CS_UTF16_UCA_FLAGS, /* state */
9158 "utf16", /* cs name */
9159 "utf16_spanish2_ci", /* m_coll_name */
9160 "", /* comment */
9161 spanish2, /* tailoring */
9162 nullptr, /* coll_param */
9163 nullptr, /* ctype */
9164 nullptr, /* to_lower */
9165 nullptr, /* to_upper */
9166 nullptr, /* sort_order */
9167 nullptr, /* uca */
9168 nullptr, /* tab_to_uni */
9169 nullptr, /* tab_from_uni */
9170 &my_unicase_default, /* caseinfo */
9171 nullptr, /* state_map */
9172 nullptr, /* ident_map */
9173 8, /* strxfrm_multiply */
9174 1, /* caseup_multiply */
9175 1, /* casedn_multiply */
9176 2, /* mbminlen */
9177 4, /* mbmaxlen */
9178 1, /* mbmaxlenlen */
9179 9, /* min_sort_char */
9180 0xFFFF, /* max_sort_char */
9181 ' ', /* pad char */
9182 false, /* escape_with_backslash_is_dangerous */
9183 1, /* levels_for_compare */
9184 &my_charset_utf16_handler,
9185 &my_collation_utf16_uca_handler,
9186 PAD_SPACE};
9187
9188 CHARSET_INFO my_charset_utf16_roman_uca_ci = {
9189 116,
9190 0,
9191 0, /* number */
9192 MY_CS_UTF16_UCA_FLAGS, /* state */
9193 "utf16", /* cs name */
9194 "utf16_roman_ci", /* m_coll_name */
9195 "", /* comment */
9196 roman, /* tailoring */
9197 nullptr, /* coll_param */
9198 nullptr, /* ctype */
9199 nullptr, /* to_lower */
9200 nullptr, /* to_upper */
9201 nullptr, /* sort_order */
9202 nullptr, /* uca */
9203 nullptr, /* tab_to_uni */
9204 nullptr, /* tab_from_uni */
9205 &my_unicase_default, /* caseinfo */
9206 nullptr, /* state_map */
9207 nullptr, /* ident_map */
9208 8, /* strxfrm_multiply */
9209 1, /* caseup_multiply */
9210 1, /* casedn_multiply */
9211 2, /* mbminlen */
9212 4, /* mbmaxlen */
9213 1, /* mbmaxlenlen */
9214 9, /* min_sort_char */
9215 0xFFFF, /* max_sort_char */
9216 ' ', /* pad char */
9217 false, /* escape_with_backslash_is_dangerous */
9218 1, /* levels_for_compare */
9219 &my_charset_utf16_handler,
9220 &my_collation_utf16_uca_handler,
9221 PAD_SPACE};
9222
9223 CHARSET_INFO my_charset_utf16_persian_uca_ci = {
9224 117,
9225 0,
9226 0, /* number */
9227 MY_CS_UTF16_UCA_FLAGS, /* state */
9228 "utf16", /* cs name */
9229 "utf16_persian_ci", /* m_coll_name */
9230 "", /* comment */
9231 persian, /* tailoring */
9232 nullptr, /* coll_param */
9233 nullptr, /* ctype */
9234 nullptr, /* to_lower */
9235 nullptr, /* to_upper */
9236 nullptr, /* sort_order */
9237 nullptr, /* uca */
9238 nullptr, /* tab_to_uni */
9239 nullptr, /* tab_from_uni */
9240 &my_unicase_default, /* caseinfo */
9241 nullptr, /* state_map */
9242 nullptr, /* ident_map */
9243 8, /* strxfrm_multiply */
9244 1, /* caseup_multiply */
9245 1, /* casedn_multiply */
9246 2, /* mbminlen */
9247 4, /* mbmaxlen */
9248 1, /* mbmaxlenlen */
9249 9, /* min_sort_char */
9250 0xFFFF, /* max_sort_char */
9251 ' ', /* pad char */
9252 false, /* escape_with_backslash_is_dangerous */
9253 1, /* levels_for_compare */
9254 &my_charset_utf16_handler,
9255 &my_collation_utf16_uca_handler,
9256 PAD_SPACE};
9257
9258 CHARSET_INFO my_charset_utf16_esperanto_uca_ci = {
9259 118,
9260 0,
9261 0, /* number */
9262 MY_CS_UTF16_UCA_FLAGS, /* state */
9263 "utf16", /* cs name */
9264 "utf16_esperanto_ci", /* m_coll_name */
9265 "", /* comment */
9266 esperanto, /* tailoring */
9267 nullptr, /* coll_param */
9268 nullptr, /* ctype */
9269 nullptr, /* to_lower */
9270 nullptr, /* to_upper */
9271 nullptr, /* sort_order */
9272 nullptr, /* uca */
9273 nullptr, /* tab_to_uni */
9274 nullptr, /* tab_from_uni */
9275 &my_unicase_default, /* caseinfo */
9276 nullptr, /* state_map */
9277 nullptr, /* ident_map */
9278 8, /* strxfrm_multiply */
9279 1, /* caseup_multiply */
9280 1, /* casedn_multiply */
9281 2, /* mbminlen */
9282 4, /* mbmaxlen */
9283 1, /* mbmaxlenlen */
9284 9, /* min_sort_char */
9285 0xFFFF, /* max_sort_char */
9286 ' ', /* pad char */
9287 false, /* escape_with_backslash_is_dangerous */
9288 1, /* levels_for_compare */
9289 &my_charset_utf16_handler,
9290 &my_collation_utf16_uca_handler,
9291 PAD_SPACE};
9292
9293 CHARSET_INFO my_charset_utf16_hungarian_uca_ci = {
9294 119,
9295 0,
9296 0, /* number */
9297 MY_CS_UTF16_UCA_FLAGS, /* state */
9298 "utf16", /* cs name */
9299 "utf16_hungarian_ci", /* m_coll_name */
9300 "", /* comment */
9301 hungarian, /* tailoring */
9302 nullptr, /* coll_param */
9303 nullptr, /* ctype */
9304 nullptr, /* to_lower */
9305 nullptr, /* to_upper */
9306 nullptr, /* sort_order */
9307 nullptr, /* uca */
9308 nullptr, /* tab_to_uni */
9309 nullptr, /* tab_from_uni */
9310 &my_unicase_default, /* caseinfo */
9311 nullptr, /* state_map */
9312 nullptr, /* ident_map */
9313 8, /* strxfrm_multiply */
9314 1, /* caseup_multiply */
9315 1, /* casedn_multiply */
9316 2, /* mbminlen */
9317 4, /* mbmaxlen */
9318 1, /* mbmaxlenlen */
9319 9, /* min_sort_char */
9320 0xFFFF, /* max_sort_char */
9321 ' ', /* pad char */
9322 false, /* escape_with_backslash_is_dangerous */
9323 1, /* levels_for_compare */
9324 &my_charset_utf16_handler,
9325 &my_collation_utf16_uca_handler,
9326 PAD_SPACE};
9327
9328 CHARSET_INFO my_charset_utf16_sinhala_uca_ci = {
9329 120,
9330 0,
9331 0, /* number */
9332 MY_CS_UTF16_UCA_FLAGS, /* state */
9333 "utf16", /* cs name */
9334 "utf16_sinhala_ci", /* m_coll_name */
9335 "", /* comment */
9336 sinhala, /* tailoring */
9337 nullptr, /* coll_param */
9338 nullptr, /* ctype */
9339 nullptr, /* to_lower */
9340 nullptr, /* to_upper */
9341 nullptr, /* sort_order */
9342 nullptr, /* uca */
9343 nullptr, /* tab_to_uni */
9344 nullptr, /* tab_from_uni */
9345 &my_unicase_default, /* caseinfo */
9346 nullptr, /* state_map */
9347 nullptr, /* ident_map */
9348 8, /* strxfrm_multiply */
9349 1, /* caseup_multiply */
9350 1, /* casedn_multiply */
9351 2, /* mbminlen */
9352 4, /* mbmaxlen */
9353 1, /* mbmaxlenlen */
9354 9, /* min_sort_char */
9355 0xFFFF, /* max_sort_char */
9356 ' ', /* pad char */
9357 false, /* escape_with_backslash_is_dangerous */
9358 1, /* levels_for_compare */
9359 &my_charset_utf16_handler,
9360 &my_collation_utf16_uca_handler,
9361 PAD_SPACE};
9362
9363 CHARSET_INFO my_charset_utf16_german2_uca_ci = {
9364 121,
9365 0,
9366 0, /* number */
9367 MY_CS_UTF16_UCA_FLAGS, /* state */
9368 "utf16", /* cs name */
9369 "utf16_german2_ci", /* m_coll_name */
9370 "", /* comment */
9371 german2, /* tailoring */
9372 nullptr, /* coll_param */
9373 nullptr, /* ctype */
9374 nullptr, /* to_lower */
9375 nullptr, /* to_upper */
9376 nullptr, /* sort_order */
9377 nullptr, /* uca */
9378 nullptr, /* tab_to_uni */
9379 nullptr, /* tab_from_uni */
9380 &my_unicase_default, /* caseinfo */
9381 nullptr, /* state_map */
9382 nullptr, /* ident_map */
9383 8, /* strxfrm_multiply */
9384 1, /* caseup_multiply */
9385 1, /* casedn_multiply */
9386 2, /* mbminlen */
9387 4, /* mbmaxlen */
9388 1, /* mbmaxlenlen */
9389 9, /* min_sort_char */
9390 0xFFFF, /* max_sort_char */
9391 ' ', /* pad char */
9392 false, /* escape_with_backslash_is_dangerous */
9393 1, /* levels_for_compare */
9394 &my_charset_utf16_handler,
9395 &my_collation_utf16_uca_handler,
9396 PAD_SPACE};
9397
9398 CHARSET_INFO my_charset_utf16_croatian_uca_ci = {
9399 122,
9400 0,
9401 0, /* number */
9402 MY_CS_UTF16_UCA_FLAGS, /* state */
9403 "utf16", /* cs name */
9404 "utf16_croatian_ci", /* m_coll_name */
9405 "", /* comment */
9406 croatian, /* tailoring */
9407 nullptr, /* coll_param */
9408 nullptr, /* ctype */
9409 nullptr, /* to_lower */
9410 nullptr, /* to_upper */
9411 nullptr, /* sort_order */
9412 nullptr, /* uca */
9413 nullptr, /* tab_to_uni */
9414 nullptr, /* tab_from_uni */
9415 &my_unicase_default, /* caseinfo */
9416 nullptr, /* state_map */
9417 nullptr, /* ident_map */
9418 8, /* strxfrm_multiply */
9419 1, /* caseup_multiply */
9420 1, /* casedn_multiply */
9421 2, /* mbminlen */
9422 4, /* mbmaxlen */
9423 1, /* mbmaxlenlen */
9424 9, /* min_sort_char */
9425 0xFFFF, /* max_sort_char */
9426 ' ', /* pad char */
9427 false, /* escape_with_backslash_is_dangerous */
9428 1, /* levels_for_compare */
9429 &my_charset_utf16_handler,
9430 &my_collation_utf16_uca_handler,
9431 PAD_SPACE};
9432
9433 CHARSET_INFO my_charset_utf16_unicode_520_ci = {
9434 123,
9435 0,
9436 0, /* number */
9437 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII,
9438 "utf16", /* csname */
9439 "utf16_unicode_520_ci", /* m_coll_name */
9440 "", /* comment */
9441 "", /* tailoring */
9442 nullptr, /* coll_param */
9443 nullptr, /* ctype */
9444 nullptr, /* to_lower */
9445 nullptr, /* to_upper */
9446 nullptr, /* sort_order */
9447 &my_uca_v520, /* uca */
9448 nullptr, /* tab_to_uni */
9449 nullptr, /* tab_from_uni */
9450 &my_unicase_unicode520, /* caseinfo */
9451 nullptr, /* state_map */
9452 nullptr, /* ident_map */
9453 8, /* strxfrm_multiply */
9454 1, /* caseup_multiply */
9455 1, /* casedn_multiply */
9456 2, /* mbminlen */
9457 4, /* mbmaxlen */
9458 1, /* mbmaxlenlen */
9459 9, /* min_sort_char */
9460 0x10FFFF, /* max_sort_char */
9461 0x20, /* pad char */
9462 false, /* escape_with_backslash_is_dangerous */
9463 1, /* levels_for_compare */
9464 &my_charset_utf16_handler,
9465 &my_collation_utf16_uca_handler,
9466 PAD_SPACE};
9467
9468 CHARSET_INFO my_charset_utf16_vietnamese_ci = {
9469 124,
9470 0,
9471 0, /* number */
9472 MY_CS_UTF16_UCA_FLAGS, /* state */
9473 "utf16", /* cs name */
9474 "utf16_vietnamese_ci", /* m_coll_name */
9475 "", /* comment */
9476 vietnamese, /* tailoring */
9477 nullptr, /* coll_param */
9478 nullptr, /* ctype */
9479 nullptr, /* to_lower */
9480 nullptr, /* to_upper */
9481 nullptr, /* sort_order */
9482 nullptr, /* uca */
9483 nullptr, /* tab_to_uni */
9484 nullptr, /* tab_from_uni */
9485 &my_unicase_default, /* caseinfo */
9486 nullptr, /* state_map */
9487 nullptr, /* ident_map */
9488 8, /* strxfrm_multiply */
9489 1, /* caseup_multiply */
9490 1, /* casedn_multiply */
9491 2, /* mbminlen */
9492 4, /* mbmaxlen */
9493 1, /* mbmaxlenlen */
9494 9, /* min_sort_char */
9495 0xFFFF, /* max_sort_char */
9496 ' ', /* pad char */
9497 false, /* escape_with_backslash_is_dangerous */
9498 1, /* levels_for_compare */
9499 &my_charset_utf16_handler,
9500 &my_collation_utf16_uca_handler,
9501 PAD_SPACE};
9502
9503 MY_COLLATION_HANDLER my_collation_gb18030_uca_handler = {
9504 my_coll_init_uca, /* init */
9505 my_coll_uninit_uca,
9506 my_strnncoll_any_uca,
9507 my_strnncollsp_any_uca,
9508 my_strnxfrm_any_uca,
9509 my_strnxfrmlen_simple,
9510 my_like_range_mb,
9511 my_wildcmp_uca,
9512 nullptr,
9513 my_instr_mb,
9514 my_hash_sort_any_uca,
9515 my_propagate_complex};
9516
9517 /**
9518 The array used for "type of characters" bit mask for each
9519 character. The ctype[0] is reserved for EOF(-1), so we use
9520 ctype[(char)+1]. Also refer to strings/CHARSET_INFO.txt
9521 */
9522 static const uchar ctype_gb18030[257] = {
9523 0, /* For standard library */
9524 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
9525 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
9526 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
9527 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 16, 16, 16, 16, 16, 16,
9528 16, 129, 129, 129, 129, 129, 129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9529 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
9530 16, 130, 130, 130, 130, 130, 130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
9531 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
9532 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9533 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9534 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9535 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9536 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9537 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9538 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9539 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0};
9540
9541 extern MY_CHARSET_HANDLER my_charset_gb18030_uca_handler;
9542
9543 CHARSET_INFO my_charset_gb18030_unicode_520_ci = {
9544 250,
9545 0,
9546 0, /* number */
9547 MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_NONASCII, /* state */
9548 "gb18030", /* cs name */
9549 "gb18030_unicode_520_ci", /* m_coll_name */
9550 "China National Standard GB18030", /* comment */
9551 "", /* tailoring */
9552 nullptr, /* coll_param */
9553 ctype_gb18030, /* ctype */
9554 nullptr, /* lower */
9555 nullptr, /* UPPER */
9556 nullptr, /* sort order */
9557 &my_uca_v520, /* uca */
9558 nullptr, /* tab_to_uni */
9559 nullptr, /* tab_from_uni */
9560 &my_unicase_unicode520, /* caseinfo */
9561 nullptr, /* state_map */
9562 nullptr, /* ident_map */
9563 8, /* strxfrm_multiply */
9564 2, /* caseup_multiply */
9565 2, /* casedn_multiply */
9566 1, /* mbminlen */
9567 4, /* mbmaxlen */
9568 2, /* mbmaxlenlen */
9569 0, /* min_sort_char */
9570 0xE3329A35, /* max_sort_char */
9571 ' ', /* pad char */
9572 false, /* escape_with_backslash_is_dangerous */
9573 1, /* levels_for_compare */
9574 &my_charset_gb18030_uca_handler,
9575 &my_collation_gb18030_uca_handler,
9576 PAD_SPACE};
9577
9578 CHARSET_INFO my_charset_utf8mb4_0900_ai_ci = {
9579 255,
9580 0,
9581 0, /* number */
9582 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_PRIMARY, /* state */
9583 MY_UTF8MB4, /* csname */
9584 MY_UTF8MB4 "_0900_ai_ci", /* m_coll_name */
9585 "UTF-8 Unicode", /* comment */
9586 nullptr, /* tailoring */
9587 nullptr, /* coll_param */
9588 ctype_utf8, /* ctype */
9589 nullptr, /* to_lower */
9590 nullptr, /* to_upper */
9591 nullptr, /* sort_order */
9592 &my_uca_v900, /* uca_900 */
9593 nullptr, /* tab_to_uni */
9594 nullptr, /* tab_from_uni */
9595 &my_unicase_unicode900, /* caseinfo */
9596 nullptr, /* state_map */
9597 nullptr, /* ident_map */
9598 0, /* strxfrm_multiply */
9599 1, /* caseup_multiply */
9600 1, /* casedn_multiply */
9601 1, /* mbminlen */
9602 4, /* mbmaxlen */
9603 1, /* mbmaxlenlen */
9604 9, /* min_sort_char */
9605 0x10FFFF, /* max_sort_char */
9606 ' ', /* pad char */
9607 false, /* escape_with_backslash_is_dangerous */
9608 1, /* levels_for_compare */
9609 &my_charset_utf8mb4_handler,
9610 &my_collation_uca_900_handler,
9611 NO_PAD};
9612
9613 CHARSET_INFO my_charset_utf8mb4_de_pb_0900_ai_ci = {
9614 256,
9615 0,
9616 0, /* number */
9617 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9618 MY_UTF8MB4, /* csname */
9619 MY_UTF8MB4 "_de_pb_0900_ai_ci", /* m_coll_name */
9620 "", /* comment */
9621 de_pb_cldr_30, /* tailoring */
9622 nullptr, /* coll_param */
9623 ctype_utf8, /* ctype */
9624 nullptr, /* to_lower */
9625 nullptr, /* to_upper */
9626 nullptr, /* sort_order */
9627 &my_uca_v900, /* uca_900 */
9628 nullptr, /* tab_to_uni */
9629 nullptr, /* tab_from_uni */
9630 &my_unicase_unicode900, /* caseinfo */
9631 nullptr, /* state_map */
9632 nullptr, /* ident_map */
9633 0, /* strxfrm_multiply */
9634 1, /* caseup_multiply */
9635 1, /* casedn_multiply */
9636 1, /* mbminlen */
9637 4, /* mbmaxlen */
9638 1, /* mbmaxlenlen */
9639 9, /* min_sort_char */
9640 0x10FFFF, /* max_sort_char */
9641 ' ', /* pad char */
9642 false, /* escape_with_backslash_is_dangerous */
9643 1, /* levels_for_compare */
9644 &my_charset_utf8mb4_handler,
9645 &my_collation_uca_900_handler,
9646 NO_PAD};
9647
9648 CHARSET_INFO my_charset_utf8mb4_is_0900_ai_ci = {
9649 257,
9650 0,
9651 0, /* number */
9652 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9653 MY_UTF8MB4, /* csname */
9654 MY_UTF8MB4 "_is_0900_ai_ci", /* m_coll_name */
9655 "", /* comment */
9656 is_cldr_30, /* tailoring */
9657 nullptr, /* coll_param */
9658 ctype_utf8, /* ctype */
9659 nullptr, /* to_lower */
9660 nullptr, /* to_upper */
9661 nullptr, /* sort_order */
9662 &my_uca_v900, /* uca */
9663 nullptr, /* tab_to_uni */
9664 nullptr, /* tab_from_uni */
9665 &my_unicase_unicode900, /* caseinfo */
9666 nullptr, /* state_map */
9667 nullptr, /* ident_map */
9668 0, /* strxfrm_multiply */
9669 1, /* caseup_multiply */
9670 1, /* casedn_multiply */
9671 1, /* mbminlen */
9672 4, /* mbmaxlen */
9673 1, /* mbmaxlenlen */
9674 9, /* min_sort_char */
9675 0x10FFFF, /* max_sort_char */
9676 ' ', /* pad char */
9677 false, /* escape_with_backslash_is_dangerous */
9678 1, /* levels_for_compare */
9679 &my_charset_utf8mb4_handler,
9680 &my_collation_uca_900_handler,
9681 NO_PAD};
9682
9683 CHARSET_INFO my_charset_utf8mb4_lv_0900_ai_ci = {
9684 258,
9685 0,
9686 0, /* number */
9687 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9688 MY_UTF8MB4, /* csname */
9689 MY_UTF8MB4 "_lv_0900_ai_ci", /* m_coll_name */
9690 "", /* comment */
9691 lv_cldr_30, /* tailoring */
9692 nullptr, /* coll_param */
9693 ctype_utf8, /* ctype */
9694 nullptr, /* to_lower */
9695 nullptr, /* to_upper */
9696 nullptr, /* sort_order */
9697 &my_uca_v900, /* uca */
9698 nullptr, /* tab_to_uni */
9699 nullptr, /* tab_from_uni */
9700 &my_unicase_unicode900, /* caseinfo */
9701 nullptr, /* state_map */
9702 nullptr, /* ident_map */
9703 0, /* strxfrm_multiply */
9704 1, /* caseup_multiply */
9705 1, /* casedn_multiply */
9706 1, /* mbminlen */
9707 4, /* mbmaxlen */
9708 1, /* mbmaxlenlen */
9709 9, /* min_sort_char */
9710 0x10FFFF, /* max_sort_char */
9711 ' ', /* pad char */
9712 false, /* escape_with_backslash_is_dangerous */
9713 1, /* levels_for_compare */
9714 &my_charset_utf8mb4_handler,
9715 &my_collation_uca_900_handler,
9716 NO_PAD};
9717
9718 CHARSET_INFO my_charset_utf8mb4_ro_0900_ai_ci = {
9719 259,
9720 0,
9721 0, /* number */
9722 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9723 MY_UTF8MB4, /* csname */
9724 MY_UTF8MB4 "_ro_0900_ai_ci", /* m_coll_name */
9725 "", /* comment */
9726 ro_cldr_30, /* tailoring */
9727 nullptr, /* coll_param */
9728 ctype_utf8, /* ctype */
9729 nullptr, /* to_lower */
9730 nullptr, /* to_upper */
9731 nullptr, /* sort_order */
9732 &my_uca_v900, /* uca */
9733 nullptr, /* tab_to_uni */
9734 nullptr, /* tab_from_uni */
9735 &my_unicase_unicode900, /* caseinfo */
9736 nullptr, /* state_map */
9737 nullptr, /* ident_map */
9738 0, /* strxfrm_multiply */
9739 1, /* caseup_multiply */
9740 1, /* casedn_multiply */
9741 1, /* mbminlen */
9742 4, /* mbmaxlen */
9743 1, /* mbmaxlenlen */
9744 9, /* min_sort_char */
9745 0x10FFFF, /* max_sort_char */
9746 ' ', /* pad char */
9747 false, /* escape_with_backslash_is_dangerous */
9748 1, /* levels_for_compare */
9749 &my_charset_utf8mb4_handler,
9750 &my_collation_uca_900_handler,
9751 NO_PAD};
9752
9753 CHARSET_INFO my_charset_utf8mb4_sl_0900_ai_ci = {
9754 260,
9755 0,
9756 0, /* number */
9757 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9758 MY_UTF8MB4, /* csname */
9759 MY_UTF8MB4 "_sl_0900_ai_ci", /* m_coll_name */
9760 "", /* comment */
9761 sl_cldr_30, /* tailoring */
9762 nullptr, /* coll_param */
9763 ctype_utf8, /* ctype */
9764 nullptr, /* to_lower */
9765 nullptr, /* to_upper */
9766 nullptr, /* sort_order */
9767 &my_uca_v900, /* uca */
9768 nullptr, /* tab_to_uni */
9769 nullptr, /* tab_from_uni */
9770 &my_unicase_unicode900, /* caseinfo */
9771 nullptr, /* state_map */
9772 nullptr, /* ident_map */
9773 0, /* strxfrm_multiply */
9774 1, /* caseup_multiply */
9775 1, /* casedn_multiply */
9776 1, /* mbminlen */
9777 4, /* mbmaxlen */
9778 1, /* mbmaxlenlen */
9779 9, /* min_sort_char */
9780 0x10FFFF, /* max_sort_char */
9781 ' ', /* pad char */
9782 false, /* escape_with_backslash_is_dangerous */
9783 1, /* levels_for_compare */
9784 &my_charset_utf8mb4_handler,
9785 &my_collation_uca_900_handler,
9786 NO_PAD};
9787
9788 CHARSET_INFO my_charset_utf8mb4_pl_0900_ai_ci = {
9789 261,
9790 0,
9791 0, /* number */
9792 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9793 MY_UTF8MB4, /* csname */
9794 MY_UTF8MB4 "_pl_0900_ai_ci", /* m_coll_name */
9795 "", /* comment */
9796 pl_cldr_30, /* tailoring */
9797 nullptr, /* coll_param */
9798 ctype_utf8, /* ctype */
9799 nullptr, /* to_lower */
9800 nullptr, /* to_upper */
9801 nullptr, /* sort_order */
9802 &my_uca_v900, /* uca */
9803 nullptr, /* tab_to_uni */
9804 nullptr, /* tab_from_uni */
9805 &my_unicase_unicode900, /* caseinfo */
9806 nullptr, /* state_map */
9807 nullptr, /* ident_map */
9808 0, /* strxfrm_multiply */
9809 1, /* caseup_multiply */
9810 1, /* casedn_multiply */
9811 1, /* mbminlen */
9812 4, /* mbmaxlen */
9813 1, /* mbmaxlenlen */
9814 9, /* min_sort_char */
9815 0x10FFFF, /* max_sort_char */
9816 ' ', /* pad char */
9817 false, /* escape_with_backslash_is_dangerous */
9818 1, /* levels_for_compare */
9819 &my_charset_utf8mb4_handler,
9820 &my_collation_uca_900_handler,
9821 NO_PAD};
9822
9823 CHARSET_INFO my_charset_utf8mb4_et_0900_ai_ci = {
9824 262,
9825 0,
9826 0, /* number */
9827 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9828 MY_UTF8MB4, /* csname */
9829 MY_UTF8MB4 "_et_0900_ai_ci", /* m_coll_name */
9830 "", /* comment */
9831 et_cldr_30, /* tailoring */
9832 nullptr, /* coll_param */
9833 ctype_utf8, /* ctype */
9834 nullptr, /* to_lower */
9835 nullptr, /* to_upper */
9836 nullptr, /* sort_order */
9837 &my_uca_v900, /* uca */
9838 nullptr, /* tab_to_uni */
9839 nullptr, /* tab_from_uni */
9840 &my_unicase_unicode900, /* caseinfo */
9841 nullptr, /* state_map */
9842 nullptr, /* ident_map */
9843 0, /* strxfrm_multiply */
9844 1, /* caseup_multiply */
9845 1, /* casedn_multiply */
9846 1, /* mbminlen */
9847 4, /* mbmaxlen */
9848 1, /* mbmaxlenlen */
9849 9, /* min_sort_char */
9850 0x10FFFF, /* max_sort_char */
9851 ' ', /* pad char */
9852 false, /* escape_with_backslash_is_dangerous */
9853 1, /* levels_for_compare */
9854 &my_charset_utf8mb4_handler,
9855 &my_collation_uca_900_handler,
9856 NO_PAD};
9857
9858 CHARSET_INFO my_charset_utf8mb4_es_0900_ai_ci = {
9859 263,
9860 0,
9861 0, /* number */
9862 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9863 MY_UTF8MB4, /* csname */
9864 MY_UTF8MB4 "_es_0900_ai_ci", /* m_coll_name */
9865 "", /* comment */
9866 spanish, /* tailoring */
9867 nullptr, /* coll_param */
9868 ctype_utf8, /* ctype */
9869 nullptr, /* to_lower */
9870 nullptr, /* to_upper */
9871 nullptr, /* sort_order */
9872 &my_uca_v900, /* uca */
9873 nullptr, /* tab_to_uni */
9874 nullptr, /* tab_from_uni */
9875 &my_unicase_unicode900, /* caseinfo */
9876 nullptr, /* state_map */
9877 nullptr, /* ident_map */
9878 0, /* strxfrm_multiply */
9879 1, /* caseup_multiply */
9880 1, /* casedn_multiply */
9881 1, /* mbminlen */
9882 4, /* mbmaxlen */
9883 1, /* mbmaxlenlen */
9884 9, /* min_sort_char */
9885 0x10FFFF, /* max_sort_char */
9886 ' ', /* pad char */
9887 false, /* escape_with_backslash_is_dangerous */
9888 1, /* levels_for_compare */
9889 &my_charset_utf8mb4_handler,
9890 &my_collation_uca_900_handler,
9891 NO_PAD};
9892
9893 CHARSET_INFO my_charset_utf8mb4_sv_0900_ai_ci = {
9894 264,
9895 0,
9896 0, /* number */
9897 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9898 MY_UTF8MB4, /* csname */
9899 MY_UTF8MB4 "_sv_0900_ai_ci", /* m_coll_name */
9900 "", /* comment */
9901 sv_cldr_30, /* tailoring */
9902 nullptr, /* coll_param */
9903 ctype_utf8, /* ctype */
9904 nullptr, /* to_lower */
9905 nullptr, /* to_upper */
9906 nullptr, /* sort_order */
9907 &my_uca_v900, /* uca */
9908 nullptr, /* tab_to_uni */
9909 nullptr, /* tab_from_uni */
9910 &my_unicase_unicode900, /* caseinfo */
9911 nullptr, /* state_map */
9912 nullptr, /* ident_map */
9913 0, /* strxfrm_multiply */
9914 1, /* caseup_multiply */
9915 1, /* casedn_multiply */
9916 1, /* mbminlen */
9917 4, /* mbmaxlen */
9918 1, /* mbmaxlenlen */
9919 9, /* min_sort_char */
9920 0x10FFFF, /* max_sort_char */
9921 ' ', /* pad char */
9922 false, /* escape_with_backslash_is_dangerous */
9923 1, /* levels_for_compare */
9924 &my_charset_utf8mb4_handler,
9925 &my_collation_uca_900_handler,
9926 NO_PAD};
9927
9928 CHARSET_INFO my_charset_utf8mb4_tr_0900_ai_ci = {
9929 265,
9930 0,
9931 0, /* number */
9932 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9933 MY_UTF8MB4, /* csname */
9934 MY_UTF8MB4 "_tr_0900_ai_ci", /* m_coll_name */
9935 "", /* comment */
9936 tr_cldr_30, /* tailoring */
9937 nullptr, /* coll_param */
9938 ctype_utf8, /* ctype */
9939 nullptr, /* to_lower */
9940 nullptr, /* to_upper */
9941 nullptr, /* sort_order */
9942 &my_uca_v900, /* uca */
9943 nullptr, /* tab_to_uni */
9944 nullptr, /* tab_from_uni */
9945 &my_unicase_unicode900, /* caseinfo */
9946 nullptr, /* state_map */
9947 nullptr, /* ident_map */
9948 0, /* strxfrm_multiply */
9949 1, /* caseup_multiply */
9950 1, /* casedn_multiply */
9951 1, /* mbminlen */
9952 4, /* mbmaxlen */
9953 1, /* mbmaxlenlen */
9954 9, /* min_sort_char */
9955 0x10FFFF, /* max_sort_char */
9956 ' ', /* pad char */
9957 false, /* escape_with_backslash_is_dangerous */
9958 1, /* levels_for_compare */
9959 &my_charset_utf8mb4_handler,
9960 &my_collation_uca_900_handler,
9961 NO_PAD};
9962
9963 CHARSET_INFO my_charset_utf8mb4_cs_0900_ai_ci = {
9964 266,
9965 0,
9966 0, /* number */
9967 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
9968 MY_UTF8MB4, /* csname */
9969 MY_UTF8MB4 "_cs_0900_ai_ci", /* m_coll_name */
9970 "", /* comment */
9971 cs_cldr_30, /* tailoring */
9972 nullptr, /* coll_param */
9973 ctype_utf8, /* ctype */
9974 nullptr, /* to_lower */
9975 nullptr, /* to_upper */
9976 nullptr, /* sort_order */
9977 &my_uca_v900, /* uca */
9978 nullptr, /* tab_to_uni */
9979 nullptr, /* tab_from_uni */
9980 &my_unicase_unicode900, /* caseinfo */
9981 nullptr, /* state_map */
9982 nullptr, /* ident_map */
9983 0, /* strxfrm_multiply */
9984 1, /* caseup_multiply */
9985 1, /* casedn_multiply */
9986 1, /* mbminlen */
9987 4, /* mbmaxlen */
9988 1, /* mbmaxlenlen */
9989 9, /* min_sort_char */
9990 0x10FFFF, /* max_sort_char */
9991 ' ', /* pad char */
9992 false, /* escape_with_backslash_is_dangerous */
9993 1, /* levels_for_compare */
9994 &my_charset_utf8mb4_handler,
9995 &my_collation_uca_900_handler,
9996 NO_PAD};
9997
9998 CHARSET_INFO my_charset_utf8mb4_da_0900_ai_ci = {
9999 267,
10000 0,
10001 0, /* number */
10002 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10003 MY_UTF8MB4, /* csname */
10004 MY_UTF8MB4 "_da_0900_ai_ci", /* m_coll_name */
10005 "", /* comment */
10006 da_cldr_30, /* tailoring */
10007 nullptr, /* coll_param */
10008 ctype_utf8, /* ctype */
10009 nullptr, /* to_lower */
10010 nullptr, /* to_upper */
10011 nullptr, /* sort_order */
10012 &my_uca_v900, /* uca */
10013 nullptr, /* tab_to_uni */
10014 nullptr, /* tab_from_uni */
10015 &my_unicase_unicode900, /* caseinfo */
10016 nullptr, /* state_map */
10017 nullptr, /* ident_map */
10018 0, /* strxfrm_multiply */
10019 1, /* caseup_multiply */
10020 1, /* casedn_multiply */
10021 1, /* mbminlen */
10022 4, /* mbmaxlen */
10023 1, /* mbmaxlenlen */
10024 9, /* min_sort_char */
10025 0x10FFFF, /* max_sort_char */
10026 ' ', /* pad char */
10027 false, /* escape_with_backslash_is_dangerous */
10028 1, /* levels_for_compare */
10029 &my_charset_utf8mb4_handler,
10030 &my_collation_uca_900_handler,
10031 NO_PAD};
10032
10033 CHARSET_INFO my_charset_utf8mb4_lt_0900_ai_ci = {
10034 268,
10035 0,
10036 0, /* number */
10037 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10038 MY_UTF8MB4, /* csname */
10039 MY_UTF8MB4 "_lt_0900_ai_ci", /* m_coll_name */
10040 "", /* comment */
10041 lt_cldr_30, /* tailoring */
10042 nullptr, /* coll_param */
10043 ctype_utf8, /* ctype */
10044 nullptr, /* to_lower */
10045 nullptr, /* to_upper */
10046 nullptr, /* sort_order */
10047 &my_uca_v900, /* uca */
10048 nullptr, /* tab_to_uni */
10049 nullptr, /* tab_from_uni */
10050 &my_unicase_unicode900, /* caseinfo */
10051 nullptr, /* state_map */
10052 nullptr, /* ident_map */
10053 0, /* strxfrm_multiply */
10054 1, /* caseup_multiply */
10055 1, /* casedn_multiply */
10056 1, /* mbminlen */
10057 4, /* mbmaxlen */
10058 1, /* mbmaxlenlen */
10059 9, /* min_sort_char */
10060 0x10FFFF, /* max_sort_char */
10061 ' ', /* pad char */
10062 false, /* escape_with_backslash_is_dangerous */
10063 1, /* levels_for_compare */
10064 &my_charset_utf8mb4_handler,
10065 &my_collation_uca_900_handler,
10066 NO_PAD};
10067
10068 CHARSET_INFO my_charset_utf8mb4_sk_0900_ai_ci = {
10069 269,
10070 0,
10071 0, /* number */
10072 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10073 MY_UTF8MB4, /* csname */
10074 MY_UTF8MB4 "_sk_0900_ai_ci", /* m_coll_name */
10075 "", /* comment */
10076 sk_cldr_30, /* tailoring */
10077 nullptr, /* coll_param */
10078 ctype_utf8, /* ctype */
10079 nullptr, /* to_lower */
10080 nullptr, /* to_upper */
10081 nullptr, /* sort_order */
10082 &my_uca_v900, /* uca */
10083 nullptr, /* tab_to_uni */
10084 nullptr, /* tab_from_uni */
10085 &my_unicase_unicode900, /* caseinfo */
10086 nullptr, /* state_map */
10087 nullptr, /* ident_map */
10088 0, /* strxfrm_multiply */
10089 1, /* caseup_multiply */
10090 1, /* casedn_multiply */
10091 1, /* mbminlen */
10092 4, /* mbmaxlen */
10093 1, /* mbmaxlenlen */
10094 9, /* min_sort_char */
10095 0x10FFFF, /* max_sort_char */
10096 ' ', /* pad char */
10097 false, /* escape_with_backslash_is_dangerous */
10098 1, /* levels_for_compare */
10099 &my_charset_utf8mb4_handler,
10100 &my_collation_uca_900_handler,
10101 NO_PAD};
10102
10103 CHARSET_INFO my_charset_utf8mb4_es_trad_0900_ai_ci = {
10104 270,
10105 0,
10106 0, /* number */
10107 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10108 MY_UTF8MB4, /* csname */
10109 MY_UTF8MB4 "_es_trad_0900_ai_ci", /* m_coll_name */
10110 "", /* comment */
10111 es_trad_cldr_30, /* tailoring */
10112 nullptr, /* coll_param */
10113 ctype_utf8, /* ctype */
10114 nullptr, /* to_lower */
10115 nullptr, /* to_upper */
10116 nullptr, /* sort_order */
10117 &my_uca_v900, /* uca */
10118 nullptr, /* tab_to_uni */
10119 nullptr, /* tab_from_uni */
10120 &my_unicase_unicode900, /* caseinfo */
10121 nullptr, /* state_map */
10122 nullptr, /* ident_map */
10123 0, /* strxfrm_multiply */
10124 1, /* caseup_multiply */
10125 1, /* casedn_multiply */
10126 1, /* mbminlen */
10127 4, /* mbmaxlen */
10128 1, /* mbmaxlenlen */
10129 9, /* min_sort_char */
10130 0x10FFFF, /* max_sort_char */
10131 ' ', /* pad char */
10132 false, /* escape_with_backslash_is_dangerous */
10133 1, /* levels_for_compare */
10134 &my_charset_utf8mb4_handler,
10135 &my_collation_uca_900_handler,
10136 NO_PAD};
10137
10138 CHARSET_INFO my_charset_utf8mb4_la_0900_ai_ci = {
10139 271,
10140 0,
10141 0, /* number */
10142 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10143 MY_UTF8MB4, /* csname */
10144 MY_UTF8MB4 "_la_0900_ai_ci", /* m_coll_name */
10145 "", /* comment */
10146 roman, /* tailoring */
10147 nullptr, /* coll_param */
10148 ctype_utf8, /* ctype */
10149 nullptr, /* to_lower */
10150 nullptr, /* to_upper */
10151 nullptr, /* sort_order */
10152 &my_uca_v900, /* uca */
10153 nullptr, /* tab_to_uni */
10154 nullptr, /* tab_from_uni */
10155 &my_unicase_unicode900, /* caseinfo */
10156 nullptr, /* state_map */
10157 nullptr, /* ident_map */
10158 0, /* strxfrm_multiply */
10159 1, /* caseup_multiply */
10160 1, /* casedn_multiply */
10161 1, /* mbminlen */
10162 4, /* mbmaxlen */
10163 1, /* mbmaxlenlen */
10164 9, /* min_sort_char */
10165 0x10FFFF, /* max_sort_char */
10166 ' ', /* pad char */
10167 false, /* escape_with_backslash_is_dangerous */
10168 1, /* levels_for_compare */
10169 &my_charset_utf8mb4_handler,
10170 &my_collation_uca_900_handler,
10171 NO_PAD};
10172
10173 #if 0
10174 CHARSET_INFO my_charset_utf8mb4_fa_0900_ai_ci=
10175 {
10176 272, 0, 0, /* number */
10177 MY_CS_UTF8MB4_UCA_FLAGS,/* state */
10178 MY_UTF8MB4, /* csname */
10179 MY_UTF8MB4 "_fa_0900_ai_ci",/* m_coll_name */
10180 "", /* comment */
10181 fa_cldr_30, /* tailoring */
10182 &fa_coll_param, /* coll_param */
10183 ctype_utf8, /* ctype */
10184 NULL, /* to_lower */
10185 NULL, /* to_upper */
10186 NULL, /* sort_order */
10187 &my_uca_v900, /* uca */
10188 NULL, /* tab_to_uni */
10189 NULL, /* tab_from_uni */
10190 &my_unicase_unicode900,/* caseinfo */
10191 NULL, /* state_map */
10192 NULL, /* ident_map */
10193 0, /* strxfrm_multiply */
10194 1, /* caseup_multiply */
10195 1, /* casedn_multiply */
10196 1, /* mbminlen */
10197 4, /* mbmaxlen */
10198 1, /* mbmaxlenlen */
10199 9, /* min_sort_char */
10200 0x10FFFF, /* max_sort_char */
10201 ' ', /* pad char */
10202 0, /* escape_with_backslash_is_dangerous */
10203 1, /* levels_for_compare */
10204 &my_charset_utf8mb4_handler,
10205 &my_collation_uca_900_handler,
10206 NO_PAD
10207 };
10208 #endif
10209
10210 CHARSET_INFO my_charset_utf8mb4_eo_0900_ai_ci = {
10211 273,
10212 0,
10213 0, /* number */
10214 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10215 MY_UTF8MB4, /* csname */
10216 MY_UTF8MB4 "_eo_0900_ai_ci", /* m_coll_name */
10217 "", /* comment */
10218 esperanto, /* tailoring */
10219 nullptr, /* coll_param */
10220 ctype_utf8, /* ctype */
10221 nullptr, /* to_lower */
10222 nullptr, /* to_upper */
10223 nullptr, /* sort_order */
10224 &my_uca_v900, /* uca */
10225 nullptr, /* tab_to_uni */
10226 nullptr, /* tab_from_uni */
10227 &my_unicase_unicode900, /* caseinfo */
10228 nullptr, /* state_map */
10229 nullptr, /* ident_map */
10230 0, /* strxfrm_multiply */
10231 1, /* caseup_multiply */
10232 1, /* casedn_multiply */
10233 1, /* mbminlen */
10234 4, /* mbmaxlen */
10235 1, /* mbmaxlenlen */
10236 9, /* min_sort_char */
10237 0x10FFFF, /* max_sort_char */
10238 ' ', /* pad char */
10239 false, /* escape_with_backslash_is_dangerous */
10240 1, /* levels_for_compare */
10241 &my_charset_utf8mb4_handler,
10242 &my_collation_uca_900_handler,
10243 NO_PAD};
10244
10245 CHARSET_INFO my_charset_utf8mb4_hu_0900_ai_ci = {
10246 274,
10247 0,
10248 0, /* number */
10249 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10250 MY_UTF8MB4, /* csname */
10251 MY_UTF8MB4 "_hu_0900_ai_ci", /* m_coll_name */
10252 "", /* comment */
10253 hu_cldr_30, /* tailoring */
10254 nullptr, /* coll_param */
10255 ctype_utf8, /* ctype */
10256 nullptr, /* to_lower */
10257 nullptr, /* to_upper */
10258 nullptr, /* sort_order */
10259 &my_uca_v900, /* uca */
10260 nullptr, /* tab_to_uni */
10261 nullptr, /* tab_from_uni */
10262 &my_unicase_unicode900, /* caseinfo */
10263 nullptr, /* state_map */
10264 nullptr, /* ident_map */
10265 0, /* strxfrm_multiply */
10266 1, /* caseup_multiply */
10267 1, /* casedn_multiply */
10268 1, /* mbminlen */
10269 4, /* mbmaxlen */
10270 1, /* mbmaxlenlen */
10271 9, /* min_sort_char */
10272 0x10FFFF, /* max_sort_char */
10273 ' ', /* pad char */
10274 false, /* escape_with_backslash_is_dangerous */
10275 1, /* levels_for_compare */
10276 &my_charset_utf8mb4_handler,
10277 &my_collation_uca_900_handler,
10278 NO_PAD};
10279
10280 CHARSET_INFO my_charset_utf8mb4_hr_0900_ai_ci = {
10281 275,
10282 0,
10283 0, /* number */
10284 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10285 MY_UTF8MB4, /* csname */
10286 MY_UTF8MB4 "_hr_0900_ai_ci", /* m_coll_name */
10287 "", /* comment */
10288 hr_cldr_30, /* tailoring */
10289 &hr_coll_param, /* coll_param */
10290 ctype_utf8, /* ctype */
10291 nullptr, /* to_lower */
10292 nullptr, /* to_upper */
10293 nullptr, /* sort_order */
10294 &my_uca_v900, /* uca */
10295 nullptr, /* tab_to_uni */
10296 nullptr, /* tab_from_uni */
10297 &my_unicase_unicode900, /* caseinfo */
10298 nullptr, /* state_map */
10299 nullptr, /* ident_map */
10300 0, /* strxfrm_multiply */
10301 1, /* caseup_multiply */
10302 1, /* casedn_multiply */
10303 1, /* mbminlen */
10304 4, /* mbmaxlen */
10305 1, /* mbmaxlenlen */
10306 9, /* min_sort_char */
10307 0x10FFFF, /* max_sort_char */
10308 ' ', /* pad char */
10309 false, /* escape_with_backslash_is_dangerous */
10310 1, /* levels_for_compare */
10311 &my_charset_utf8mb4_handler,
10312 &my_collation_uca_900_handler,
10313 NO_PAD};
10314
10315 #if 0
10316 CHARSET_INFO my_charset_utf8mb4_si_0900_ai_ci=
10317 {
10318 276, 0, 0, /* number */
10319 MY_CS_UTF8MB4_UCA_FLAGS,/* state */
10320 MY_UTF8MB4, /* csname */
10321 MY_UTF8MB4 "_si_0900_ai_ci",/* m_coll_name */
10322 "", /* comment */
10323 si_cldr_30, /* tailoring */
10324 NULL, /* coll_param */
10325 ctype_utf8, /* ctype */
10326 NULL, /* to_lower */
10327 NULL, /* to_upper */
10328 NULL, /* sort_order */
10329 &my_uca_v900, /* uca */
10330 NULL, /* tab_to_uni */
10331 NULL, /* tab_from_uni */
10332 &my_unicase_unicode900,/* caseinfo */
10333 NULL, /* state_map */
10334 NULL, /* ident_map */
10335 0, /* strxfrm_multiply */
10336 1, /* caseup_multiply */
10337 1, /* casedn_multiply */
10338 1, /* mbminlen */
10339 4, /* mbmaxlen */
10340 1, /* mbmaxlenlen */
10341 9, /* min_sort_char */
10342 0x10FFFF, /* max_sort_char */
10343 ' ', /* pad char */
10344 0, /* escape_with_backslash_is_dangerous */
10345 1, /* levels_for_compare */
10346 &my_charset_utf8mb4_handler,
10347 &my_collation_uca_900_handler,
10348 NO_PAD
10349 };
10350 #endif
10351
10352 CHARSET_INFO my_charset_utf8mb4_vi_0900_ai_ci = {
10353 277,
10354 0,
10355 0, /* number */
10356 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
10357 MY_UTF8MB4, /* csname */
10358 MY_UTF8MB4 "_vi_0900_ai_ci", /* m_coll_name */
10359 "", /* comment */
10360 vi_cldr_30, /* tailoring */
10361 nullptr, /* coll_param */
10362 ctype_utf8, /* ctype */
10363 nullptr, /* to_lower */
10364 nullptr, /* to_upper */
10365 nullptr, /* sort_order */
10366 &my_uca_v900, /* uca */
10367 nullptr, /* tab_to_uni */
10368 nullptr, /* tab_from_uni */
10369 &my_unicase_unicode900, /* caseinfo */
10370 nullptr, /* state_map */
10371 nullptr, /* ident_map */
10372 0, /* strxfrm_multiply */
10373 1, /* caseup_multiply */
10374 1, /* casedn_multiply */
10375 1, /* mbminlen */
10376 4, /* mbmaxlen */
10377 1, /* mbmaxlenlen */
10378 9, /* min_sort_char */
10379 0x10FFFF, /* max_sort_char */
10380 ' ', /* pad char */
10381 false, /* escape_with_backslash_is_dangerous */
10382 1, /* levels_for_compare */
10383 &my_charset_utf8mb4_handler,
10384 &my_collation_uca_900_handler,
10385 NO_PAD};
10386
10387 CHARSET_INFO my_charset_utf8mb4_0900_as_cs = {
10388 278,
10389 0,
10390 0, /* number */
10391 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10392 MY_UTF8MB4, /* csname */
10393 MY_UTF8MB4 "_0900_as_cs", /* m_coll_name */
10394 "", /* comment */
10395 nullptr, /* tailoring */
10396 nullptr, /* coll_param */
10397 ctype_utf8, /* ctype */
10398 nullptr, /* to_lower */
10399 nullptr, /* to_upper */
10400 nullptr, /* sort_order */
10401 &my_uca_v900, /* uca */
10402 nullptr, /* tab_to_uni */
10403 nullptr, /* tab_from_uni */
10404 &my_unicase_unicode900, /* caseinfo */
10405 nullptr, /* state_map */
10406 nullptr, /* ident_map */
10407 0, /* strxfrm_multiply */
10408 1, /* caseup_multiply */
10409 1, /* casedn_multiply */
10410 1, /* mbminlen */
10411 4, /* mbmaxlen */
10412 1, /* mbmaxlenlen */
10413 9, /* min_sort_char */
10414 0x10FFFF, /* max_sort_char */
10415 ' ', /* pad char */
10416 false, /* escape_with_backslash_is_dangerous */
10417 3, /* levels_for_compare */
10418 &my_charset_utf8mb4_handler,
10419 &my_collation_uca_900_handler,
10420 NO_PAD};
10421
10422 CHARSET_INFO my_charset_utf8mb4_de_pb_0900_as_cs = {
10423 279,
10424 0,
10425 0, /* number */
10426 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10427 MY_UTF8MB4, /* csname */
10428 MY_UTF8MB4 "_de_pb_0900_as_cs", /* m_coll_name */
10429 "", /* comment */
10430 de_pb_cldr_30, /* tailoring */
10431 nullptr, /* coll_param */
10432 ctype_utf8, /* ctype */
10433 nullptr, /* to_lower */
10434 nullptr, /* to_upper */
10435 nullptr, /* sort_order */
10436 &my_uca_v900, /* uca */
10437 nullptr, /* tab_to_uni */
10438 nullptr, /* tab_from_uni */
10439 &my_unicase_unicode900, /* caseinfo */
10440 nullptr, /* state_map */
10441 nullptr, /* ident_map */
10442 0, /* strxfrm_multiply */
10443 1, /* caseup_multiply */
10444 1, /* casedn_multiply */
10445 1, /* mbminlen */
10446 4, /* mbmaxlen */
10447 1, /* mbmaxlenlen */
10448 9, /* min_sort_char */
10449 0x10FFFF, /* max_sort_char */
10450 ' ', /* pad char */
10451 false, /* escape_with_backslash_is_dangerous */
10452 3, /* levels_for_compare */
10453 &my_charset_utf8mb4_handler,
10454 &my_collation_uca_900_handler,
10455 NO_PAD};
10456
10457 CHARSET_INFO my_charset_utf8mb4_is_0900_as_cs = {
10458 280,
10459 0,
10460 0, /* number */
10461 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10462 MY_UTF8MB4, /* csname */
10463 MY_UTF8MB4 "_is_0900_as_cs", /* m_coll_name */
10464 "", /* comment */
10465 is_cldr_30, /* tailoring */
10466 nullptr, /* coll_param */
10467 ctype_utf8, /* ctype */
10468 nullptr, /* to_lower */
10469 nullptr, /* to_upper */
10470 nullptr, /* sort_order */
10471 &my_uca_v900, /* uca */
10472 nullptr, /* tab_to_uni */
10473 nullptr, /* tab_from_uni */
10474 &my_unicase_unicode900, /* caseinfo */
10475 nullptr, /* state_map */
10476 nullptr, /* ident_map */
10477 0, /* strxfrm_multiply */
10478 1, /* caseup_multiply */
10479 1, /* casedn_multiply */
10480 1, /* mbminlen */
10481 4, /* mbmaxlen */
10482 1, /* mbmaxlenlen */
10483 9, /* min_sort_char */
10484 0x10FFFF, /* max_sort_char */
10485 ' ', /* pad char */
10486 false, /* escape_with_backslash_is_dangerous */
10487 3, /* levels_for_compare */
10488 &my_charset_utf8mb4_handler,
10489 &my_collation_uca_900_handler,
10490 NO_PAD};
10491
10492 CHARSET_INFO my_charset_utf8mb4_lv_0900_as_cs = {
10493 281,
10494 0,
10495 0, /* number */
10496 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10497 MY_UTF8MB4, /* csname */
10498 MY_UTF8MB4 "_lv_0900_as_cs", /* m_coll_name */
10499 "", /* comment */
10500 lv_cldr_30, /* tailoring */
10501 nullptr, /* coll_param */
10502 ctype_utf8, /* ctype */
10503 nullptr, /* to_lower */
10504 nullptr, /* to_upper */
10505 nullptr, /* sort_order */
10506 &my_uca_v900, /* uca */
10507 nullptr, /* tab_to_uni */
10508 nullptr, /* tab_from_uni */
10509 &my_unicase_unicode900, /* caseinfo */
10510 nullptr, /* state_map */
10511 nullptr, /* ident_map */
10512 0, /* strxfrm_multiply */
10513 1, /* caseup_multiply */
10514 1, /* casedn_multiply */
10515 1, /* mbminlen */
10516 4, /* mbmaxlen */
10517 1, /* mbmaxlenlen */
10518 9, /* min_sort_char */
10519 0x10FFFF, /* max_sort_char */
10520 ' ', /* pad char */
10521 false, /* escape_with_backslash_is_dangerous */
10522 3, /* levels_for_compare */
10523 &my_charset_utf8mb4_handler,
10524 &my_collation_uca_900_handler,
10525 NO_PAD};
10526
10527 CHARSET_INFO my_charset_utf8mb4_ro_0900_as_cs = {
10528 282,
10529 0,
10530 0, /* number */
10531 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10532 MY_UTF8MB4, /* csname */
10533 MY_UTF8MB4 "_ro_0900_as_cs", /* m_coll_name */
10534 "", /* comment */
10535 ro_cldr_30, /* tailoring */
10536 nullptr, /* coll_param */
10537 ctype_utf8, /* ctype */
10538 nullptr, /* to_lower */
10539 nullptr, /* to_upper */
10540 nullptr, /* sort_order */
10541 &my_uca_v900, /* uca */
10542 nullptr, /* tab_to_uni */
10543 nullptr, /* tab_from_uni */
10544 &my_unicase_unicode900, /* caseinfo */
10545 nullptr, /* state_map */
10546 nullptr, /* ident_map */
10547 0, /* strxfrm_multiply */
10548 1, /* caseup_multiply */
10549 1, /* casedn_multiply */
10550 1, /* mbminlen */
10551 4, /* mbmaxlen */
10552 1, /* mbmaxlenlen */
10553 9, /* min_sort_char */
10554 0x10FFFF, /* max_sort_char */
10555 ' ', /* pad char */
10556 false, /* escape_with_backslash_is_dangerous */
10557 3, /* levels_for_compare */
10558 &my_charset_utf8mb4_handler,
10559 &my_collation_uca_900_handler,
10560 NO_PAD};
10561
10562 CHARSET_INFO my_charset_utf8mb4_sl_0900_as_cs = {
10563 283,
10564 0,
10565 0, /* number */
10566 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10567 MY_UTF8MB4, /* csname */
10568 MY_UTF8MB4 "_sl_0900_as_cs", /* m_coll_name */
10569 "", /* comment */
10570 sl_cldr_30, /* tailoring */
10571 nullptr, /* coll_param */
10572 ctype_utf8, /* ctype */
10573 nullptr, /* to_lower */
10574 nullptr, /* to_upper */
10575 nullptr, /* sort_order */
10576 &my_uca_v900, /* uca */
10577 nullptr, /* tab_to_uni */
10578 nullptr, /* tab_from_uni */
10579 &my_unicase_unicode900, /* caseinfo */
10580 nullptr, /* state_map */
10581 nullptr, /* ident_map */
10582 0, /* strxfrm_multiply */
10583 1, /* caseup_multiply */
10584 1, /* casedn_multiply */
10585 1, /* mbminlen */
10586 4, /* mbmaxlen */
10587 1, /* mbmaxlenlen */
10588 9, /* min_sort_char */
10589 0x10FFFF, /* max_sort_char */
10590 ' ', /* pad char */
10591 false, /* escape_with_backslash_is_dangerous */
10592 3, /* levels_for_compare */
10593 &my_charset_utf8mb4_handler,
10594 &my_collation_uca_900_handler,
10595 NO_PAD};
10596
10597 CHARSET_INFO my_charset_utf8mb4_pl_0900_as_cs = {
10598 284,
10599 0,
10600 0, /* number */
10601 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10602 MY_UTF8MB4, /* csname */
10603 MY_UTF8MB4 "_pl_0900_as_cs", /* m_coll_name */
10604 "", /* comment */
10605 pl_cldr_30, /* tailoring */
10606 nullptr, /* coll_param */
10607 ctype_utf8, /* ctype */
10608 nullptr, /* to_lower */
10609 nullptr, /* to_upper */
10610 nullptr, /* sort_order */
10611 &my_uca_v900, /* uca */
10612 nullptr, /* tab_to_uni */
10613 nullptr, /* tab_from_uni */
10614 &my_unicase_unicode900, /* caseinfo */
10615 nullptr, /* state_map */
10616 nullptr, /* ident_map */
10617 0, /* strxfrm_multiply */
10618 1, /* caseup_multiply */
10619 1, /* casedn_multiply */
10620 1, /* mbminlen */
10621 4, /* mbmaxlen */
10622 1, /* mbmaxlenlen */
10623 9, /* min_sort_char */
10624 0x10FFFF, /* max_sort_char */
10625 ' ', /* pad char */
10626 false, /* escape_with_backslash_is_dangerous */
10627 3, /* levels_for_compare */
10628 &my_charset_utf8mb4_handler,
10629 &my_collation_uca_900_handler,
10630 NO_PAD};
10631
10632 CHARSET_INFO my_charset_utf8mb4_et_0900_as_cs = {
10633 285,
10634 0,
10635 0, /* number */
10636 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10637 MY_UTF8MB4, /* csname */
10638 MY_UTF8MB4 "_et_0900_as_cs", /* m_coll_name */
10639 "", /* comment */
10640 et_cldr_30, /* tailoring */
10641 nullptr, /* coll_param */
10642 ctype_utf8, /* ctype */
10643 nullptr, /* to_lower */
10644 nullptr, /* to_upper */
10645 nullptr, /* sort_order */
10646 &my_uca_v900, /* uca */
10647 nullptr, /* tab_to_uni */
10648 nullptr, /* tab_from_uni */
10649 &my_unicase_unicode900, /* caseinfo */
10650 nullptr, /* state_map */
10651 nullptr, /* ident_map */
10652 0, /* strxfrm_multiply */
10653 1, /* caseup_multiply */
10654 1, /* casedn_multiply */
10655 1, /* mbminlen */
10656 4, /* mbmaxlen */
10657 1, /* mbmaxlenlen */
10658 9, /* min_sort_char */
10659 0x10FFFF, /* max_sort_char */
10660 ' ', /* pad char */
10661 false, /* escape_with_backslash_is_dangerous */
10662 3, /* levels_for_compare */
10663 &my_charset_utf8mb4_handler,
10664 &my_collation_uca_900_handler,
10665 NO_PAD};
10666
10667 CHARSET_INFO my_charset_utf8mb4_es_0900_as_cs = {
10668 286,
10669 0,
10670 0, /* number */
10671 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10672 MY_UTF8MB4, /* csname */
10673 MY_UTF8MB4 "_es_0900_as_cs", /* m_coll_name */
10674 "", /* comment */
10675 spanish, /* tailoring */
10676 nullptr, /* coll_param */
10677 ctype_utf8, /* ctype */
10678 nullptr, /* to_lower */
10679 nullptr, /* to_upper */
10680 nullptr, /* sort_order */
10681 &my_uca_v900, /* uca */
10682 nullptr, /* tab_to_uni */
10683 nullptr, /* tab_from_uni */
10684 &my_unicase_unicode900, /* caseinfo */
10685 nullptr, /* state_map */
10686 nullptr, /* ident_map */
10687 0, /* strxfrm_multiply */
10688 1, /* caseup_multiply */
10689 1, /* casedn_multiply */
10690 1, /* mbminlen */
10691 4, /* mbmaxlen */
10692 1, /* mbmaxlenlen */
10693 9, /* min_sort_char */
10694 0x10FFFF, /* max_sort_char */
10695 ' ', /* pad char */
10696 false, /* escape_with_backslash_is_dangerous */
10697 3, /* levels_for_compare */
10698 &my_charset_utf8mb4_handler,
10699 &my_collation_uca_900_handler,
10700 NO_PAD};
10701
10702 CHARSET_INFO my_charset_utf8mb4_sv_0900_as_cs = {
10703 287,
10704 0,
10705 0, /* number */
10706 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10707 MY_UTF8MB4, /* csname */
10708 MY_UTF8MB4 "_sv_0900_as_cs", /* m_coll_name */
10709 "", /* comment */
10710 sv_cldr_30, /* tailoring */
10711 nullptr, /* coll_param */
10712 ctype_utf8, /* ctype */
10713 nullptr, /* to_lower */
10714 nullptr, /* to_upper */
10715 nullptr, /* sort_order */
10716 &my_uca_v900, /* uca */
10717 nullptr, /* tab_to_uni */
10718 nullptr, /* tab_from_uni */
10719 &my_unicase_unicode900, /* caseinfo */
10720 nullptr, /* state_map */
10721 nullptr, /* ident_map */
10722 0, /* strxfrm_multiply */
10723 1, /* caseup_multiply */
10724 1, /* casedn_multiply */
10725 1, /* mbminlen */
10726 4, /* mbmaxlen */
10727 1, /* mbmaxlenlen */
10728 9, /* min_sort_char */
10729 0x10FFFF, /* max_sort_char */
10730 ' ', /* pad char */
10731 false, /* escape_with_backslash_is_dangerous */
10732 3, /* levels_for_compare */
10733 &my_charset_utf8mb4_handler,
10734 &my_collation_uca_900_handler,
10735 NO_PAD};
10736
10737 CHARSET_INFO my_charset_utf8mb4_tr_0900_as_cs = {
10738 288,
10739 0,
10740 0, /* number */
10741 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10742 MY_UTF8MB4, /* csname */
10743 MY_UTF8MB4 "_tr_0900_as_cs", /* m_coll_name */
10744 "", /* comment */
10745 tr_cldr_30, /* tailoring */
10746 nullptr, /* coll_param */
10747 ctype_utf8, /* ctype */
10748 nullptr, /* to_lower */
10749 nullptr, /* to_upper */
10750 nullptr, /* sort_order */
10751 &my_uca_v900, /* uca */
10752 nullptr, /* tab_to_uni */
10753 nullptr, /* tab_from_uni */
10754 &my_unicase_unicode900, /* caseinfo */
10755 nullptr, /* state_map */
10756 nullptr, /* ident_map */
10757 0, /* strxfrm_multiply */
10758 1, /* caseup_multiply */
10759 1, /* casedn_multiply */
10760 1, /* mbminlen */
10761 4, /* mbmaxlen */
10762 1, /* mbmaxlenlen */
10763 9, /* min_sort_char */
10764 0x10FFFF, /* max_sort_char */
10765 ' ', /* pad char */
10766 false, /* escape_with_backslash_is_dangerous */
10767 3, /* levels_for_compare */
10768 &my_charset_utf8mb4_handler,
10769 &my_collation_uca_900_handler,
10770 NO_PAD};
10771
10772 CHARSET_INFO my_charset_utf8mb4_cs_0900_as_cs = {
10773 289,
10774 0,
10775 0, /* number */
10776 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10777 MY_UTF8MB4, /* csname */
10778 MY_UTF8MB4 "_cs_0900_as_cs", /* m_coll_name */
10779 "", /* comment */
10780 cs_cldr_30, /* tailoring */
10781 nullptr, /* coll_param */
10782 ctype_utf8, /* ctype */
10783 nullptr, /* to_lower */
10784 nullptr, /* to_upper */
10785 nullptr, /* sort_order */
10786 &my_uca_v900, /* uca */
10787 nullptr, /* tab_to_uni */
10788 nullptr, /* tab_from_uni */
10789 &my_unicase_unicode900, /* caseinfo */
10790 nullptr, /* state_map */
10791 nullptr, /* ident_map */
10792 0, /* strxfrm_multiply */
10793 1, /* caseup_multiply */
10794 1, /* casedn_multiply */
10795 1, /* mbminlen */
10796 4, /* mbmaxlen */
10797 1, /* mbmaxlenlen */
10798 9, /* min_sort_char */
10799 0x10FFFF, /* max_sort_char */
10800 ' ', /* pad char */
10801 false, /* escape_with_backslash_is_dangerous */
10802 3, /* levels_for_compare */
10803 &my_charset_utf8mb4_handler,
10804 &my_collation_uca_900_handler,
10805 NO_PAD};
10806
10807 CHARSET_INFO my_charset_utf8mb4_da_0900_as_cs = {
10808 290,
10809 0,
10810 0, /* number */
10811 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10812 MY_UTF8MB4, /* csname */
10813 MY_UTF8MB4 "_da_0900_as_cs", /* m_coll_name */
10814 "", /* comment */
10815 da_cldr_30, /* tailoring */
10816 &da_coll_param, /* coll_param */
10817 ctype_utf8, /* ctype */
10818 nullptr, /* to_lower */
10819 nullptr, /* to_upper */
10820 nullptr, /* sort_order */
10821 &my_uca_v900, /* uca */
10822 nullptr, /* tab_to_uni */
10823 nullptr, /* tab_from_uni */
10824 &my_unicase_unicode900, /* caseinfo */
10825 nullptr, /* state_map */
10826 nullptr, /* ident_map */
10827 0, /* strxfrm_multiply */
10828 1, /* caseup_multiply */
10829 1, /* casedn_multiply */
10830 1, /* mbminlen */
10831 4, /* mbmaxlen */
10832 1, /* mbmaxlenlen */
10833 9, /* min_sort_char */
10834 0x10FFFF, /* max_sort_char */
10835 ' ', /* pad char */
10836 false, /* escape_with_backslash_is_dangerous */
10837 3, /* levels_for_compare */
10838 &my_charset_utf8mb4_handler,
10839 &my_collation_uca_900_handler,
10840 NO_PAD};
10841
10842 CHARSET_INFO my_charset_utf8mb4_lt_0900_as_cs = {
10843 291,
10844 0,
10845 0, /* number */
10846 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10847 MY_UTF8MB4, /* csname */
10848 MY_UTF8MB4 "_lt_0900_as_cs", /* m_coll_name */
10849 "", /* comment */
10850 lt_cldr_30, /* tailoring */
10851 nullptr, /* coll_param */
10852 ctype_utf8, /* ctype */
10853 nullptr, /* to_lower */
10854 nullptr, /* to_upper */
10855 nullptr, /* sort_order */
10856 &my_uca_v900, /* uca */
10857 nullptr, /* tab_to_uni */
10858 nullptr, /* tab_from_uni */
10859 &my_unicase_unicode900, /* caseinfo */
10860 nullptr, /* state_map */
10861 nullptr, /* ident_map */
10862 0, /* strxfrm_multiply */
10863 1, /* caseup_multiply */
10864 1, /* casedn_multiply */
10865 1, /* mbminlen */
10866 4, /* mbmaxlen */
10867 1, /* mbmaxlenlen */
10868 9, /* min_sort_char */
10869 0x10FFFF, /* max_sort_char */
10870 ' ', /* pad char */
10871 false, /* escape_with_backslash_is_dangerous */
10872 3, /* levels_for_compare */
10873 &my_charset_utf8mb4_handler,
10874 &my_collation_uca_900_handler,
10875 NO_PAD};
10876
10877 CHARSET_INFO my_charset_utf8mb4_sk_0900_as_cs = {
10878 292,
10879 0,
10880 0, /* number */
10881 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10882 MY_UTF8MB4, /* csname */
10883 MY_UTF8MB4 "_sk_0900_as_cs", /* m_coll_name */
10884 "", /* comment */
10885 sk_cldr_30, /* tailoring */
10886 nullptr, /* coll_param */
10887 ctype_utf8, /* ctype */
10888 nullptr, /* to_lower */
10889 nullptr, /* to_upper */
10890 nullptr, /* sort_order */
10891 &my_uca_v900, /* uca */
10892 nullptr, /* tab_to_uni */
10893 nullptr, /* tab_from_uni */
10894 &my_unicase_unicode900, /* caseinfo */
10895 nullptr, /* state_map */
10896 nullptr, /* ident_map */
10897 0, /* strxfrm_multiply */
10898 1, /* caseup_multiply */
10899 1, /* casedn_multiply */
10900 1, /* mbminlen */
10901 4, /* mbmaxlen */
10902 1, /* mbmaxlenlen */
10903 9, /* min_sort_char */
10904 0x10FFFF, /* max_sort_char */
10905 ' ', /* pad char */
10906 false, /* escape_with_backslash_is_dangerous */
10907 3, /* levels_for_compare */
10908 &my_charset_utf8mb4_handler,
10909 &my_collation_uca_900_handler,
10910 NO_PAD};
10911
10912 CHARSET_INFO my_charset_utf8mb4_es_trad_0900_as_cs = {
10913 293,
10914 0,
10915 0, /* number */
10916 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10917 MY_UTF8MB4, /* csname */
10918 MY_UTF8MB4 "_es_trad_0900_as_cs", /* m_coll_name */
10919 "", /* comment */
10920 es_trad_cldr_30, /* tailoring */
10921 nullptr, /* coll_param */
10922 ctype_utf8, /* ctype */
10923 nullptr, /* to_lower */
10924 nullptr, /* to_upper */
10925 nullptr, /* sort_order */
10926 &my_uca_v900, /* uca */
10927 nullptr, /* tab_to_uni */
10928 nullptr, /* tab_from_uni */
10929 &my_unicase_unicode900, /* caseinfo */
10930 nullptr, /* state_map */
10931 nullptr, /* ident_map */
10932 0, /* strxfrm_multiply */
10933 1, /* caseup_multiply */
10934 1, /* casedn_multiply */
10935 1, /* mbminlen */
10936 4, /* mbmaxlen */
10937 1, /* mbmaxlenlen */
10938 9, /* min_sort_char */
10939 0x10FFFF, /* max_sort_char */
10940 ' ', /* pad char */
10941 false, /* escape_with_backslash_is_dangerous */
10942 3, /* levels_for_compare */
10943 &my_charset_utf8mb4_handler,
10944 &my_collation_uca_900_handler,
10945 NO_PAD};
10946
10947 CHARSET_INFO my_charset_utf8mb4_la_0900_as_cs = {
10948 294,
10949 0,
10950 0, /* number */
10951 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
10952 MY_UTF8MB4, /* csname */
10953 MY_UTF8MB4 "_la_0900_as_cs", /* m_coll_name */
10954 "", /* comment */
10955 roman, /* tailoring */
10956 nullptr, /* coll_param */
10957 ctype_utf8, /* ctype */
10958 nullptr, /* to_lower */
10959 nullptr, /* to_upper */
10960 nullptr, /* sort_order */
10961 &my_uca_v900, /* uca */
10962 nullptr, /* tab_to_uni */
10963 nullptr, /* tab_from_uni */
10964 &my_unicase_unicode900, /* caseinfo */
10965 nullptr, /* state_map */
10966 nullptr, /* ident_map */
10967 0, /* strxfrm_multiply */
10968 1, /* caseup_multiply */
10969 1, /* casedn_multiply */
10970 1, /* mbminlen */
10971 4, /* mbmaxlen */
10972 1, /* mbmaxlenlen */
10973 9, /* min_sort_char */
10974 0x10FFFF, /* max_sort_char */
10975 ' ', /* pad char */
10976 false, /* escape_with_backslash_is_dangerous */
10977 3, /* levels_for_compare */
10978 &my_charset_utf8mb4_handler,
10979 &my_collation_uca_900_handler,
10980 NO_PAD};
10981
10982 #if 0
10983 CHARSET_INFO my_charset_utf8mb4_fa_0900_as_cs=
10984 {
10985 295, 0, 0, /* number */
10986 MY_CS_UTF8MB4_UCA_FLAGS|MY_CS_CSSORT,/* state */
10987 MY_UTF8MB4, /* csname */
10988 MY_UTF8MB4 "_fa_0900_as_cs",/* m_coll_name */
10989 "", /* comment */
10990 fa_cldr_30, /* tailoring */
10991 &fa_coll_param, /* coll_param */
10992 ctype_utf8, /* ctype */
10993 NULL, /* to_lower */
10994 NULL, /* to_upper */
10995 NULL, /* sort_order */
10996 &my_uca_v900, /* uca */
10997 NULL, /* tab_to_uni */
10998 NULL, /* tab_from_uni */
10999 &my_unicase_unicode900,/* caseinfo */
11000 NULL, /* state_map */
11001 NULL, /* ident_map */
11002 0, /* strxfrm_multiply */
11003 1, /* caseup_multiply */
11004 1, /* casedn_multiply */
11005 1, /* mbminlen */
11006 4, /* mbmaxlen */
11007 1, /* mbmaxlenlen */
11008 9, /* min_sort_char */
11009 0x10FFFF, /* max_sort_char */
11010 ' ', /* pad char */
11011 0, /* escape_with_backslash_is_dangerous */
11012 3, /* levels_for_compare */
11013 &my_charset_utf8mb4_handler,
11014 &my_collation_uca_900_handler,
11015 NO_PAD
11016 };
11017 #endif
11018
11019 CHARSET_INFO my_charset_utf8mb4_eo_0900_as_cs = {
11020 296,
11021 0,
11022 0, /* number */
11023 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11024 MY_UTF8MB4, /* csname */
11025 MY_UTF8MB4 "_eo_0900_as_cs", /* m_coll_name */
11026 "", /* comment */
11027 esperanto, /* tailoring */
11028 nullptr, /* coll_param */
11029 ctype_utf8, /* ctype */
11030 nullptr, /* to_lower */
11031 nullptr, /* to_upper */
11032 nullptr, /* sort_order */
11033 &my_uca_v900, /* uca */
11034 nullptr, /* tab_to_uni */
11035 nullptr, /* tab_from_uni */
11036 &my_unicase_unicode900, /* caseinfo */
11037 nullptr, /* state_map */
11038 nullptr, /* ident_map */
11039 0, /* strxfrm_multiply */
11040 1, /* caseup_multiply */
11041 1, /* casedn_multiply */
11042 1, /* mbminlen */
11043 4, /* mbmaxlen */
11044 1, /* mbmaxlenlen */
11045 9, /* min_sort_char */
11046 0x10FFFF, /* max_sort_char */
11047 ' ', /* pad char */
11048 false, /* escape_with_backslash_is_dangerous */
11049 3, /* levels_for_compare */
11050 &my_charset_utf8mb4_handler,
11051 &my_collation_uca_900_handler,
11052 NO_PAD};
11053
11054 CHARSET_INFO my_charset_utf8mb4_hu_0900_as_cs = {
11055 297,
11056 0,
11057 0, /* number */
11058 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11059 MY_UTF8MB4, /* csname */
11060 MY_UTF8MB4 "_hu_0900_as_cs", /* m_coll_name */
11061 "", /* comment */
11062 hu_cldr_30, /* tailoring */
11063 nullptr, /* coll_param */
11064 ctype_utf8, /* ctype */
11065 nullptr, /* to_lower */
11066 nullptr, /* to_upper */
11067 nullptr, /* sort_order */
11068 &my_uca_v900, /* uca */
11069 nullptr, /* tab_to_uni */
11070 nullptr, /* tab_from_uni */
11071 &my_unicase_unicode900, /* caseinfo */
11072 nullptr, /* state_map */
11073 nullptr, /* ident_map */
11074 0, /* strxfrm_multiply */
11075 1, /* caseup_multiply */
11076 1, /* casedn_multiply */
11077 1, /* mbminlen */
11078 4, /* mbmaxlen */
11079 1, /* mbmaxlenlen */
11080 9, /* min_sort_char */
11081 0x10FFFF, /* max_sort_char */
11082 ' ', /* pad char */
11083 false, /* escape_with_backslash_is_dangerous */
11084 3, /* levels_for_compare */
11085 &my_charset_utf8mb4_handler,
11086 &my_collation_uca_900_handler,
11087 NO_PAD};
11088
11089 CHARSET_INFO my_charset_utf8mb4_hr_0900_as_cs = {
11090 298,
11091 0,
11092 0, /* number */
11093 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11094 MY_UTF8MB4, /* csname */
11095 MY_UTF8MB4 "_hr_0900_as_cs", /* m_coll_name */
11096 "", /* comment */
11097 hr_cldr_30, /* tailoring */
11098 &hr_coll_param, /* coll_param */
11099 ctype_utf8, /* ctype */
11100 nullptr, /* to_lower */
11101 nullptr, /* to_upper */
11102 nullptr, /* sort_order */
11103 &my_uca_v900, /* uca */
11104 nullptr, /* tab_to_uni */
11105 nullptr, /* tab_from_uni */
11106 &my_unicase_unicode900, /* caseinfo */
11107 nullptr, /* state_map */
11108 nullptr, /* ident_map */
11109 0, /* strxfrm_multiply */
11110 1, /* caseup_multiply */
11111 1, /* casedn_multiply */
11112 1, /* mbminlen */
11113 4, /* mbmaxlen */
11114 1, /* mbmaxlenlen */
11115 9, /* min_sort_char */
11116 0x10FFFF, /* max_sort_char */
11117 ' ', /* pad char */
11118 false, /* escape_with_backslash_is_dangerous */
11119 3, /* levels_for_compare */
11120 &my_charset_utf8mb4_handler,
11121 &my_collation_uca_900_handler,
11122 NO_PAD};
11123
11124 #if 0
11125 CHARSET_INFO my_charset_utf8mb4_si_0900_as_cs=
11126 {
11127 299, 0, 0, /* number */
11128 MY_CS_UTF8MB4_UCA_FLAGS|MY_CS_CSSORT,/* state */
11129 MY_UTF8MB4, /* csname */
11130 MY_UTF8MB4 "_si_0900_as_cs",/* m_coll_name */
11131 "", /* comment */
11132 si_cldr_30, /* tailoring */
11133 NULL, /* coll_param */
11134 ctype_utf8, /* ctype */
11135 NULL, /* to_lower */
11136 NULL, /* to_upper */
11137 NULL, /* sort_order */
11138 &my_uca_v900, /* uca */
11139 NULL, /* tab_to_uni */
11140 NULL, /* tab_from_uni */
11141 &my_unicase_unicode900,/* caseinfo */
11142 NULL, /* state_map */
11143 NULL, /* ident_map */
11144 0, /* strxfrm_multiply */
11145 1, /* caseup_multiply */
11146 1, /* casedn_multiply */
11147 1, /* mbminlen */
11148 4, /* mbmaxlen */
11149 1, /* mbmaxlenlen */
11150 9, /* min_sort_char */
11151 0x10FFFF, /* max_sort_char */
11152 ' ', /* pad char */
11153 0, /* escape_with_backslash_is_dangerous */
11154 3, /* levels_for_compare */
11155 &my_charset_utf8mb4_handler,
11156 &my_collation_uca_900_handler,
11157 NO_PAD
11158 };
11159 #endif
11160
11161 CHARSET_INFO my_charset_utf8mb4_vi_0900_as_cs = {
11162 300,
11163 0,
11164 0, /* number */
11165 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11166 MY_UTF8MB4, /* csname */
11167 MY_UTF8MB4 "_vi_0900_as_cs", /* m_coll_name */
11168 "", /* comment */
11169 vi_cldr_30, /* tailoring */
11170 &vi_coll_param, /* coll_param */
11171 ctype_utf8, /* ctype */
11172 nullptr, /* to_lower */
11173 nullptr, /* to_upper */
11174 nullptr, /* sort_order */
11175 &my_uca_v900, /* uca */
11176 nullptr, /* tab_to_uni */
11177 nullptr, /* tab_from_uni */
11178 &my_unicase_unicode900, /* caseinfo */
11179 nullptr, /* state_map */
11180 nullptr, /* ident_map */
11181 0, /* strxfrm_multiply */
11182 1, /* caseup_multiply */
11183 1, /* casedn_multiply */
11184 1, /* mbminlen */
11185 4, /* mbmaxlen */
11186 1, /* mbmaxlenlen */
11187 9, /* min_sort_char */
11188 0x10FFFF, /* max_sort_char */
11189 ' ', /* pad char */
11190 false, /* escape_with_backslash_is_dangerous */
11191 3, /* levels_for_compare */
11192 &my_charset_utf8mb4_handler,
11193 &my_collation_uca_900_handler,
11194 NO_PAD};
11195
11196 CHARSET_INFO my_charset_utf8mb4_ja_0900_as_cs = {
11197 303,
11198 0,
11199 0, /* number */
11200 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11201 MY_UTF8MB4, /* csname */
11202 MY_UTF8MB4 "_ja_0900_as_cs", /* m_coll_name */
11203 "", /* comment */
11204 ja_cldr_30, /* tailoring */
11205 &ja_coll_param, /* coll_param */
11206 ctype_utf8, /* ctype */
11207 nullptr, /* to_lower */
11208 nullptr, /* to_upper */
11209 nullptr, /* sort_order */
11210 &my_uca_v900, /* uca */
11211 nullptr, /* tab_to_uni */
11212 nullptr, /* tab_from_uni */
11213 &my_unicase_unicode900, /* caseinfo */
11214 nullptr, /* state_map */
11215 nullptr, /* ident_map */
11216 0, /* strxfrm_multiply */
11217 1, /* caseup_multiply */
11218 1, /* casedn_multiply */
11219 1, /* mbminlen */
11220 4, /* mbmaxlen */
11221 1, /* mbmaxlenlen */
11222 32, /* min_sort_char */
11223 0x10FFFF, /* max_sort_char */
11224 ' ', /* pad char */
11225 false, /* escape_with_backslash_is_dangerous */
11226 3, /* levels_for_compare */
11227 &my_charset_utf8mb4_handler,
11228 &my_collation_uca_900_handler,
11229 NO_PAD};
11230
11231 CHARSET_INFO my_charset_utf8mb4_ja_0900_as_cs_ks = {
11232 304,
11233 0,
11234 0, /* number */
11235 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11236 MY_UTF8MB4, /* csname */
11237 MY_UTF8MB4 "_ja_0900_as_cs_ks", /* m_coll_name */
11238 "", /* comment */
11239 ja_cldr_30, /* tailoring */
11240 &ja_coll_param, /* coll_param */
11241 ctype_utf8, /* ctype */
11242 nullptr, /* to_lower */
11243 nullptr, /* to_upper */
11244 nullptr, /* sort_order */
11245 &my_uca_v900, /* uca */
11246 nullptr, /* tab_to_uni */
11247 nullptr, /* tab_from_uni */
11248 &my_unicase_unicode900, /* caseinfo */
11249 nullptr, /* state_map */
11250 nullptr, /* ident_map */
11251 24, /* strxfrm_multiply */
11252 1, /* caseup_multiply */
11253 1, /* casedn_multiply */
11254 1, /* mbminlen */
11255 4, /* mbmaxlen */
11256 1, /* mbmaxlenlen */
11257 32, /* min_sort_char */
11258 0x10FFFF, /* max_sort_char */
11259 ' ', /* pad char */
11260 false, /* escape_with_backslash_is_dangerous */
11261 4, /* levels_for_compare */
11262 &my_charset_utf8mb4_handler,
11263 &my_collation_uca_900_handler,
11264 NO_PAD};
11265
11266 CHARSET_INFO my_charset_utf8mb4_0900_as_ci = {
11267 305,
11268 0,
11269 0, /* number */
11270 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11271 MY_UTF8MB4, /* csname */
11272 MY_UTF8MB4 "_0900_as_ci", /* m_coll_name */
11273 "", /* comment */
11274 nullptr, /* tailoring */
11275 nullptr, /* coll_param */
11276 ctype_utf8, /* ctype */
11277 nullptr, /* to_lower */
11278 nullptr, /* to_upper */
11279 nullptr, /* sort_order */
11280 &my_uca_v900, /* uca */
11281 nullptr, /* tab_to_uni */
11282 nullptr, /* tab_from_uni */
11283 &my_unicase_unicode900, /* caseinfo */
11284 nullptr, /* state_map */
11285 nullptr, /* ident_map */
11286 0, /* strxfrm_multiply */
11287 1, /* caseup_multiply */
11288 1, /* casedn_multiply */
11289 1, /* mbminlen */
11290 4, /* mbmaxlen */
11291 1, /* mbmaxlenlen */
11292 32, /* min_sort_char */
11293 0x10FFFF, /* max_sort_char */
11294 ' ', /* pad char */
11295 false, /* escape_with_backslash_is_dangerous */
11296 2, /* levels_for_compare */
11297 &my_charset_utf8mb4_handler,
11298 &my_collation_uca_900_handler,
11299 NO_PAD};
11300
11301 CHARSET_INFO my_charset_utf8mb4_ru_0900_ai_ci = {
11302 306,
11303 0,
11304 0, /* number */
11305 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11306 MY_UTF8MB4, /* csname */
11307 MY_UTF8MB4 "_ru_0900_ai_ci", /* m_coll_name */
11308 "", /* comment */
11309 "", /* tailoring */
11310 &ru_coll_param, /* coll_param */
11311 ctype_utf8, /* ctype */
11312 nullptr, /* to_lower */
11313 nullptr, /* to_upper */
11314 nullptr, /* sort_order */
11315 &my_uca_v900, /* uca */
11316 nullptr, /* tab_to_uni */
11317 nullptr, /* tab_from_uni */
11318 &my_unicase_unicode900, /* caseinfo */
11319 nullptr, /* state_map */
11320 nullptr, /* ident_map */
11321 0, /* strxfrm_multiply */
11322 1, /* caseup_multiply */
11323 1, /* casedn_multiply */
11324 1, /* mbminlen */
11325 4, /* mbmaxlen */
11326 1, /* mbmaxlenlen */
11327 32, /* min_sort_char */
11328 0x10FFFF, /* max_sort_char */
11329 ' ', /* pad char */
11330 false, /* escape_with_backslash_is_dangerous */
11331 1, /* levels_for_compare */
11332 &my_charset_utf8mb4_handler,
11333 &my_collation_uca_900_handler,
11334 NO_PAD};
11335
11336 CHARSET_INFO my_charset_utf8mb4_ru_0900_as_cs = {
11337 307,
11338 0,
11339 0, /* number */
11340 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11341 MY_UTF8MB4, /* csname */
11342 MY_UTF8MB4 "_ru_0900_as_cs", /* m_coll_name */
11343 "", /* comment */
11344 "", /* tailoring */
11345 &ru_coll_param, /* coll_param */
11346 ctype_utf8, /* ctype */
11347 nullptr, /* to_lower */
11348 nullptr, /* to_upper */
11349 nullptr, /* sort_order */
11350 &my_uca_v900, /* uca */
11351 nullptr, /* tab_to_uni */
11352 nullptr, /* tab_from_uni */
11353 &my_unicase_unicode900, /* caseinfo */
11354 nullptr, /* state_map */
11355 nullptr, /* ident_map */
11356 0, /* strxfrm_multiply */
11357 1, /* caseup_multiply */
11358 1, /* casedn_multiply */
11359 1, /* mbminlen */
11360 4, /* mbmaxlen */
11361 1, /* mbmaxlenlen */
11362 32, /* min_sort_char */
11363 0x10FFFF, /* max_sort_char */
11364 ' ', /* pad char */
11365 false, /* escape_with_backslash_is_dangerous */
11366 3, /* levels_for_compare */
11367 &my_charset_utf8mb4_handler,
11368 &my_collation_uca_900_handler,
11369 NO_PAD};
11370
11371 CHARSET_INFO my_charset_utf8mb4_zh_0900_as_cs = {
11372 308,
11373 0,
11374 0, /* number */
11375 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11376 MY_UTF8MB4, /* csname */
11377 MY_UTF8MB4 "_zh_0900_as_cs", /* m_coll_name */
11378 "", /* comment */
11379 zh_cldr_30, /* tailoring */
11380 &zh_coll_param, /* coll_param */
11381 ctype_utf8, /* ctype */
11382 nullptr, /* to_lower */
11383 nullptr, /* to_upper */
11384 nullptr, /* sort_order */
11385 &my_uca_v900, /* uca */
11386 nullptr, /* tab_to_uni */
11387 nullptr, /* tab_from_uni */
11388 &my_unicase_unicode900, /* caseinfo */
11389 nullptr, /* state_map */
11390 nullptr, /* ident_map */
11391 0, /* strxfrm_multiply */
11392 1, /* caseup_multiply */
11393 1, /* casedn_multiply */
11394 1, /* mbminlen */
11395 4, /* mbmaxlen */
11396 1, /* mbmaxlenlen */
11397 32, /* min_sort_char */
11398 0x10FFFF, /* max_sort_char */
11399 ' ', /* pad char */
11400 false, /* escape_with_backslash_is_dangerous */
11401 3, /* levels_for_compare */
11402 &my_charset_utf8mb4_handler,
11403 &my_collation_uca_900_handler,
11404 NO_PAD};
11405
11406 /*
11407 Comparing the UTF-8 representation automatically yields codepoint order,
11408 so we can just do a binary comparison. Note that
11409 my_strnxfrm_unicode_full_bin() chooses to transform to UCS before collation;
11410 this is purely for legacy reasons and is not needed here.
11411 */
11412 3343047 static size_t my_strnxfrm_utf8mb4_0900_bin(const CHARSET_INFO *cs
11413 [[maybe_unused]],
11414 uchar *dst, size_t dstlen,
11415 uint nweights [[maybe_unused]],
11416 const uchar *src, size_t srclen,
11417 uint flags) {
11418
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3343047 times.
3343047 assert(src);
11419
11420 3343047 size_t weight_len = std::min<size_t>(srclen, dstlen);
11421 3343047 memcpy(dst, src, weight_len);
11422
2/2
✓ Branch 0 taken 78 times.
✓ Branch 1 taken 3342969 times.
3343047 if (flags & MY_STRXFRM_PAD_TO_MAXLEN) {
11423 78 memset(dst + weight_len, 0, dstlen - weight_len);
11424 78 return dstlen;
11425 } else {
11426 3342969 return weight_len;
11427 }
11428 }
11429
11430 1377502 static int my_strnncollsp_utf8mb4_0900_bin(const CHARSET_INFO *cs,
11431 const uchar *s, size_t slen,
11432 const uchar *t, size_t tlen) {
11433 1377502 return my_strnncoll_mb_bin(cs, s, slen, t, tlen, false);
11434 }
11435
11436 static MY_COLLATION_HANDLER my_collation_utf8mb4_0900_bin_handler = {
11437 nullptr, /* init */
11438 nullptr,
11439 my_strnncoll_mb_bin,
11440 my_strnncollsp_utf8mb4_0900_bin,
11441 my_strnxfrm_utf8mb4_0900_bin,
11442 my_strnxfrmlen_simple,
11443 my_like_range_mb,
11444 my_wildcmp_mb_bin,
11445 my_strcasecmp_mb_bin,
11446 my_instr_mb,
11447 my_hash_sort_mb_bin,
11448 my_propagate_simple};
11449
11450 CHARSET_INFO my_charset_utf8mb4_0900_bin = {
11451 309,
11452 0,
11453 0, // number
11454 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_BINSORT, // state
11455 MY_UTF8MB4, // cs name
11456 MY_UTF8MB4 "_0900_bin", // name
11457 "", // comment
11458 nullptr, // tailoring
11459 nullptr, // coll_param
11460 ctype_utf8, // ctype
11461 nullptr, // to_lower
11462 nullptr, // to_upper
11463 nullptr, // sort_order
11464 nullptr, // uca
11465 nullptr, // tab_to_uni
11466 nullptr, // tab_from_uni
11467 &my_unicase_unicode900, // caseinfo
11468 nullptr, // state_map
11469 nullptr, // ident_map
11470 1, // strxfrm_multiply
11471 1, // caseup_multiply
11472 1, // casedn_multiply
11473 1, // mbminlen
11474 4, // mbmaxlen
11475 1, // mbmaxlenlen
11476 0, // min_sort_char
11477 0x10FFFF, // max_sort_char
11478 ' ', // pad char
11479 false, // escape_with_backslash_is_dangerous
11480 1, // levels_for_compare
11481 &my_charset_utf8mb4_handler,
11482 &my_collation_utf8mb4_0900_bin_handler,
11483 NO_PAD};
11484
11485 CHARSET_INFO my_charset_utf8mb4_nb_0900_ai_ci = {
11486 310,
11487 0,
11488 0, /* number */
11489 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11490 MY_UTF8MB4, /* csname */
11491 MY_UTF8MB4 "_nb_0900_ai_ci", /* name */
11492 "", /* comment */
11493 da_cldr_30, /* tailoring */
11494 nullptr, /* coll_param */
11495 ctype_utf8, /* ctype */
11496 nullptr, /* to_lower */
11497 nullptr, /* to_upper */
11498 nullptr, /* sort_order */
11499 &my_uca_v900, /* uca */
11500 nullptr, /* tab_to_uni */
11501 nullptr, /* tab_from_uni */
11502 &my_unicase_unicode900, /* caseinfo */
11503 nullptr, /* state_map */
11504 nullptr, /* ident_map */
11505 0, /* strxfrm_multiply */
11506 1, /* caseup_multiply */
11507 1, /* casedn_multiply */
11508 1, /* mbminlen */
11509 4, /* mbmaxlen */
11510 1, /* mbmaxlenlen */
11511 9, /* min_sort_char */
11512 0x10FFFF, /* max_sort_char */
11513 ' ', /* pad char */
11514 false, /* escape_with_backslash_is_dangerous */
11515 1, /* levels_for_compare */
11516 &my_charset_utf8mb4_handler,
11517 &my_collation_uca_900_handler,
11518 NO_PAD};
11519
11520 CHARSET_INFO my_charset_utf8mb4_nb_0900_as_cs = {
11521 311,
11522 0,
11523 0, /* number */
11524 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11525 MY_UTF8MB4, /* csname */
11526 MY_UTF8MB4 "_nb_0900_as_cs", /* name */
11527 "", /* comment */
11528 da_cldr_30, /* tailoring */
11529 &no_coll_param, /* coll_param */
11530 ctype_utf8, /* ctype */
11531 nullptr, /* to_lower */
11532 nullptr, /* to_upper */
11533 nullptr, /* sort_order */
11534 &my_uca_v900, /* uca */
11535 nullptr, /* tab_to_uni */
11536 nullptr, /* tab_from_uni */
11537 &my_unicase_unicode900, /* caseinfo */
11538 nullptr, /* state_map */
11539 nullptr, /* ident_map */
11540 0, /* strxfrm_multiply */
11541 1, /* caseup_multiply */
11542 1, /* casedn_multiply */
11543 1, /* mbminlen */
11544 4, /* mbmaxlen */
11545 1, /* mbmaxlenlen */
11546 9, /* min_sort_char */
11547 0x10FFFF, /* max_sort_char */
11548 ' ', /* pad char */
11549 false, /* escape_with_backslash_is_dangerous */
11550 3, /* levels_for_compare */
11551 &my_charset_utf8mb4_handler,
11552 &my_collation_uca_900_handler,
11553 NO_PAD};
11554
11555 CHARSET_INFO my_charset_utf8mb4_nn_0900_ai_ci = {
11556 312,
11557 0,
11558 0, /* number */
11559 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11560 MY_UTF8MB4, /* csname */
11561 MY_UTF8MB4 "_nn_0900_ai_ci", /* name */
11562 "", /* comment */
11563 da_cldr_30, /* tailoring */
11564 nullptr, /* coll_param */
11565 ctype_utf8, /* ctype */
11566 nullptr, /* to_lower */
11567 nullptr, /* to_upper */
11568 nullptr, /* sort_order */
11569 &my_uca_v900, /* uca */
11570 nullptr, /* tab_to_uni */
11571 nullptr, /* tab_from_uni */
11572 &my_unicase_unicode900, /* caseinfo */
11573 nullptr, /* state_map */
11574 nullptr, /* ident_map */
11575 0, /* strxfrm_multiply */
11576 1, /* caseup_multiply */
11577 1, /* casedn_multiply */
11578 1, /* mbminlen */
11579 4, /* mbmaxlen */
11580 1, /* mbmaxlenlen */
11581 9, /* min_sort_char */
11582 0x10FFFF, /* max_sort_char */
11583 ' ', /* pad char */
11584 false, /* escape_with_backslash_is_dangerous */
11585 1, /* levels_for_compare */
11586 &my_charset_utf8mb4_handler,
11587 &my_collation_uca_900_handler,
11588 NO_PAD};
11589
11590 CHARSET_INFO my_charset_utf8mb4_nn_0900_as_cs = {
11591 313,
11592 0,
11593 0, /* number */
11594 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11595 MY_UTF8MB4, /* csname */
11596 MY_UTF8MB4 "_nn_0900_as_cs", /* name */
11597 "", /* comment */
11598 da_cldr_30, /* tailoring */
11599 &no_coll_param, /* coll_param */
11600 ctype_utf8, /* ctype */
11601 nullptr, /* to_lower */
11602 nullptr, /* to_upper */
11603 nullptr, /* sort_order */
11604 &my_uca_v900, /* uca */
11605 nullptr, /* tab_to_uni */
11606 nullptr, /* tab_from_uni */
11607 &my_unicase_unicode900, /* caseinfo */
11608 nullptr, /* state_map */
11609 nullptr, /* ident_map */
11610 0, /* strxfrm_multiply */
11611 1, /* caseup_multiply */
11612 1, /* casedn_multiply */
11613 1, /* mbminlen */
11614 4, /* mbmaxlen */
11615 1, /* mbmaxlenlen */
11616 9, /* min_sort_char */
11617 0x10FFFF, /* max_sort_char */
11618 ' ', /* pad char */
11619 false, /* escape_with_backslash_is_dangerous */
11620 3, /* levels_for_compare */
11621 &my_charset_utf8mb4_handler,
11622 &my_collation_uca_900_handler,
11623 NO_PAD};
11624
11625 CHARSET_INFO my_charset_utf8mb4_sr_latn_0900_ai_ci = {
11626 314,
11627 0,
11628 0, /* number */
11629 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11630 MY_UTF8MB4, /* csname */
11631 MY_UTF8MB4 "_sr_latn_0900_ai_ci", /* name */
11632 "", /* comment */
11633 hr_cldr_30, /* tailoring */
11634 &hr_coll_param, /* coll_param */
11635 ctype_utf8, /* ctype */
11636 nullptr, /* to_lower */
11637 nullptr, /* to_upper */
11638 nullptr, /* sort_order */
11639 &my_uca_v900, /* uca */
11640 nullptr, /* tab_to_uni */
11641 nullptr, /* tab_from_uni */
11642 &my_unicase_unicode900, /* caseinfo */
11643 nullptr, /* state_map */
11644 nullptr, /* ident_map */
11645 0, /* strxfrm_multiply */
11646 1, /* caseup_multiply */
11647 1, /* casedn_multiply */
11648 1, /* mbminlen */
11649 4, /* mbmaxlen */
11650 1, /* mbmaxlenlen */
11651 9, /* min_sort_char */
11652 0x10FFFF, /* max_sort_char */
11653 ' ', /* pad char */
11654 false, /* escape_with_backslash_is_dangerous */
11655 1, /* levels_for_compare */
11656 &my_charset_utf8mb4_handler,
11657 &my_collation_uca_900_handler,
11658 NO_PAD};
11659
11660 CHARSET_INFO my_charset_utf8mb4_sr_latn_0900_as_cs = {
11661 315,
11662 0,
11663 0, /* number */
11664 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11665 MY_UTF8MB4, /* csname */
11666 MY_UTF8MB4 "_sr_latn_0900_as_cs", /* name */
11667 "", /* comment */
11668 hr_cldr_30, /* tailoring */
11669 &hr_coll_param, /* coll_param */
11670 ctype_utf8, /* ctype */
11671 nullptr, /* to_lower */
11672 nullptr, /* to_upper */
11673 nullptr, /* sort_order */
11674 &my_uca_v900, /* uca */
11675 nullptr, /* tab_to_uni */
11676 nullptr, /* tab_from_uni */
11677 &my_unicase_unicode900, /* caseinfo */
11678 nullptr, /* state_map */
11679 nullptr, /* ident_map */
11680 0, /* strxfrm_multiply */
11681 1, /* caseup_multiply */
11682 1, /* casedn_multiply */
11683 1, /* mbminlen */
11684 4, /* mbmaxlen */
11685 1, /* mbmaxlenlen */
11686 9, /* min_sort_char */
11687 0x10FFFF, /* max_sort_char */
11688 ' ', /* pad char */
11689 false, /* escape_with_backslash_is_dangerous */
11690 3, /* levels_for_compare */
11691 &my_charset_utf8mb4_handler,
11692 &my_collation_uca_900_handler,
11693 NO_PAD};
11694
11695 CHARSET_INFO my_charset_utf8mb4_bs_0900_ai_ci = {
11696 316,
11697 0,
11698 0, /* number */
11699 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11700 MY_UTF8MB4, /* csname */
11701 MY_UTF8MB4 "_bs_0900_ai_ci", /* name */
11702 "", /* comment */
11703 hr_cldr_30, /* tailoring */
11704 &hr_coll_param, /* coll_param */
11705 ctype_utf8, /* ctype */
11706 nullptr, /* to_lower */
11707 nullptr, /* to_upper */
11708 nullptr, /* sort_order */
11709 &my_uca_v900, /* uca */
11710 nullptr, /* tab_to_uni */
11711 nullptr, /* tab_from_uni */
11712 &my_unicase_unicode900, /* caseinfo */
11713 nullptr, /* state_map */
11714 nullptr, /* ident_map */
11715 0, /* strxfrm_multiply */
11716 1, /* caseup_multiply */
11717 1, /* casedn_multiply */
11718 1, /* mbminlen */
11719 4, /* mbmaxlen */
11720 1, /* mbmaxlenlen */
11721 9, /* min_sort_char */
11722 0x10FFFF, /* max_sort_char */
11723 ' ', /* pad char */
11724 false, /* escape_with_backslash_is_dangerous */
11725 1, /* levels_for_compare */
11726 &my_charset_utf8mb4_handler,
11727 &my_collation_uca_900_handler,
11728 NO_PAD};
11729
11730 CHARSET_INFO my_charset_utf8mb4_bs_0900_as_cs = {
11731 317,
11732 0,
11733 0, /* number */
11734 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11735 MY_UTF8MB4, /* csname */
11736 MY_UTF8MB4 "_bs_0900_as_cs", /* name */
11737 "", /* comment */
11738 hr_cldr_30, /* tailoring */
11739 &hr_coll_param, /* coll_param */
11740 ctype_utf8, /* ctype */
11741 nullptr, /* to_lower */
11742 nullptr, /* to_upper */
11743 nullptr, /* sort_order */
11744 &my_uca_v900, /* uca */
11745 nullptr, /* tab_to_uni */
11746 nullptr, /* tab_from_uni */
11747 &my_unicase_unicode900, /* caseinfo */
11748 nullptr, /* state_map */
11749 nullptr, /* ident_map */
11750 0, /* strxfrm_multiply */
11751 1, /* caseup_multiply */
11752 1, /* casedn_multiply */
11753 1, /* mbminlen */
11754 4, /* mbmaxlen */
11755 1, /* mbmaxlenlen */
11756 9, /* min_sort_char */
11757 0x10FFFF, /* max_sort_char */
11758 ' ', /* pad char */
11759 false, /* escape_with_backslash_is_dangerous */
11760 3, /* levels_for_compare */
11761 &my_charset_utf8mb4_handler,
11762 &my_collation_uca_900_handler,
11763 NO_PAD};
11764
11765 CHARSET_INFO my_charset_utf8mb4_bg_0900_ai_ci = {
11766 318,
11767 0,
11768 0, /* number */
11769 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11770 MY_UTF8MB4, /* csname */
11771 MY_UTF8MB4 "_bg_0900_ai_ci", /* name */
11772 "", /* comment */
11773 "", /* tailoring */
11774 &ru_coll_param, /* coll_param */
11775 ctype_utf8, /* ctype */
11776 nullptr, /* to_lower */
11777 nullptr, /* to_upper */
11778 nullptr, /* sort_order */
11779 &my_uca_v900, /* uca */
11780 nullptr, /* tab_to_uni */
11781 nullptr, /* tab_from_uni */
11782 &my_unicase_unicode900, /* caseinfo */
11783 nullptr, /* state_map */
11784 nullptr, /* ident_map */
11785 0, /* strxfrm_multiply */
11786 1, /* caseup_multiply */
11787 1, /* casedn_multiply */
11788 1, /* mbminlen */
11789 4, /* mbmaxlen */
11790 1, /* mbmaxlenlen */
11791 32, /* min_sort_char */
11792 0x10FFFF, /* max_sort_char */
11793 ' ', /* pad char */
11794 false, /* escape_with_backslash_is_dangerous */
11795 1, /* levels_for_compare */
11796 &my_charset_utf8mb4_handler,
11797 &my_collation_uca_900_handler,
11798 NO_PAD};
11799
11800 CHARSET_INFO my_charset_utf8mb4_bg_0900_as_cs = {
11801 319,
11802 0,
11803 0, /* number */
11804 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11805 MY_UTF8MB4, /* csname */
11806 MY_UTF8MB4 "_bg_0900_as_cs", /* name */
11807 "", /* comment */
11808 "", /* tailoring */
11809 &ru_coll_param, /* coll_param */
11810 ctype_utf8, /* ctype */
11811 nullptr, /* to_lower */
11812 nullptr, /* to_upper */
11813 nullptr, /* sort_order */
11814 &my_uca_v900, /* uca */
11815 nullptr, /* tab_to_uni */
11816 nullptr, /* tab_from_uni */
11817 &my_unicase_unicode900, /* caseinfo */
11818 nullptr, /* state_map */
11819 nullptr, /* ident_map */
11820 0, /* strxfrm_multiply */
11821 1, /* caseup_multiply */
11822 1, /* casedn_multiply */
11823 1, /* mbminlen */
11824 4, /* mbmaxlen */
11825 1, /* mbmaxlenlen */
11826 32, /* min_sort_char */
11827 0x10FFFF, /* max_sort_char */
11828 ' ', /* pad char */
11829 false, /* escape_with_backslash_is_dangerous */
11830 3, /* levels_for_compare */
11831 &my_charset_utf8mb4_handler,
11832 &my_collation_uca_900_handler,
11833 NO_PAD};
11834
11835 CHARSET_INFO my_charset_utf8mb4_gl_0900_ai_ci = {
11836 320,
11837 0,
11838 0, /* number */
11839 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11840 MY_UTF8MB4, /* csname */
11841 MY_UTF8MB4 "_gl_0900_ai_ci", /* name */
11842 "", /* comment */
11843 spanish, /* tailoring */
11844 nullptr, /* coll_param */
11845 ctype_utf8, /* ctype */
11846 nullptr, /* to_lower */
11847 nullptr, /* to_upper */
11848 nullptr, /* sort_order */
11849 &my_uca_v900, /* uca */
11850 nullptr, /* tab_to_uni */
11851 nullptr, /* tab_from_uni */
11852 &my_unicase_unicode900, /* caseinfo */
11853 nullptr, /* state_map */
11854 nullptr, /* ident_map */
11855 0, /* strxfrm_multiply */
11856 1, /* caseup_multiply */
11857 1, /* casedn_multiply */
11858 1, /* mbminlen */
11859 4, /* mbmaxlen */
11860 1, /* mbmaxlenlen */
11861 9, /* min_sort_char */
11862 0x10FFFF, /* max_sort_char */
11863 ' ', /* pad char */
11864 false, /* escape_with_backslash_is_dangerous */
11865 1, /* levels_for_compare */
11866 &my_charset_utf8mb4_handler,
11867 &my_collation_uca_900_handler,
11868 NO_PAD};
11869
11870 CHARSET_INFO my_charset_utf8mb4_gl_0900_as_cs = {
11871 321,
11872 0,
11873 0, /* number */
11874 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11875 MY_UTF8MB4, /* csname */
11876 MY_UTF8MB4 "_gl_0900_as_cs", /* name */
11877 "", /* comment */
11878 spanish, /* tailoring */
11879 nullptr, /* coll_param */
11880 ctype_utf8, /* ctype */
11881 nullptr, /* to_lower */
11882 nullptr, /* to_upper */
11883 nullptr, /* sort_order */
11884 &my_uca_v900, /* uca */
11885 nullptr, /* tab_to_uni */
11886 nullptr, /* tab_from_uni */
11887 &my_unicase_unicode900, /* caseinfo */
11888 nullptr, /* state_map */
11889 nullptr, /* ident_map */
11890 0, /* strxfrm_multiply */
11891 1, /* caseup_multiply */
11892 1, /* casedn_multiply */
11893 1, /* mbminlen */
11894 4, /* mbmaxlen */
11895 1, /* mbmaxlenlen */
11896 9, /* min_sort_char */
11897 0x10FFFF, /* max_sort_char */
11898 ' ', /* pad char */
11899 false, /* escape_with_backslash_is_dangerous */
11900 3, /* levels_for_compare */
11901 &my_charset_utf8mb4_handler,
11902 &my_collation_uca_900_handler,
11903 NO_PAD};
11904
11905 CHARSET_INFO my_charset_utf8mb4_mn_cyrl_0900_ai_ci = {
11906 322,
11907 0,
11908 0, /* number */
11909 MY_CS_UTF8MB4_UCA_FLAGS, /* state */
11910 MY_UTF8MB4, /* csname */
11911 MY_UTF8MB4 "_mn_cyrl_0900_ai_ci", /* name */
11912 "", /* comment */
11913 "", /* tailoring */
11914 &ru_coll_param, /* coll_param */
11915 ctype_utf8, /* ctype */
11916 nullptr, /* to_lower */
11917 nullptr, /* to_upper */
11918 nullptr, /* sort_order */
11919 &my_uca_v900, /* uca */
11920 nullptr, /* tab_to_uni */
11921 nullptr, /* tab_from_uni */
11922 &my_unicase_unicode900, /* caseinfo */
11923 nullptr, /* state_map */
11924 nullptr, /* ident_map */
11925 0, /* strxfrm_multiply */
11926 1, /* caseup_multiply */
11927 1, /* casedn_multiply */
11928 1, /* mbminlen */
11929 4, /* mbmaxlen */
11930 1, /* mbmaxlenlen */
11931 32, /* min_sort_char */
11932 0x10FFFF, /* max_sort_char */
11933 ' ', /* pad char */
11934 false, /* escape_with_backslash_is_dangerous */
11935 1, /* levels_for_compare */
11936 &my_charset_utf8mb4_handler,
11937 &my_collation_uca_900_handler,
11938 NO_PAD};
11939
11940 CHARSET_INFO my_charset_utf8mb4_mn_cyrl_0900_as_cs = {
11941 323,
11942 0,
11943 0, /* number */
11944 MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */
11945 MY_UTF8MB4, /* csname */
11946 MY_UTF8MB4 "_mn_cyrl_0900_as_cs", /* name */
11947 "", /* comment */
11948 "", /* tailoring */
11949 &ru_coll_param, /* coll_param */
11950 ctype_utf8, /* ctype */
11951 nullptr, /* to_lower */
11952 nullptr, /* to_upper */
11953 nullptr, /* sort_order */
11954 &my_uca_v900, /* uca */
11955 nullptr, /* tab_to_uni */
11956 nullptr, /* tab_from_uni */
11957 &my_unicase_unicode900, /* caseinfo */
11958 nullptr, /* state_map */
11959 nullptr, /* ident_map */
11960 0, /* strxfrm_multiply */
11961 1, /* caseup_multiply */
11962 1, /* casedn_multiply */
11963 1, /* mbminlen */
11964 4, /* mbmaxlen */
11965 1, /* mbmaxlenlen */
11966 32, /* min_sort_char */
11967 0x10FFFF, /* max_sort_char */
11968 ' ', /* pad char */
11969 false, /* escape_with_backslash_is_dangerous */
11970 3, /* levels_for_compare */
11971 &my_charset_utf8mb4_handler,
11972 &my_collation_uca_900_handler,
11973 NO_PAD};
11974